Example #1
0
    def test_34_load_to_dwh_duplicate_insert(self):
        df_result = db_utils.get_dwh_result_as_df(self.conn,
                                                  "select * from covid.daily_stats ds  order by ds.rep_date desc", "")
        exp_shape = (253, 4)
        exp_recent_record = list([datetime.date(2020, 9, 30), 7262695, 206852, 2840688])
        self.assertListEqual(list(df_result.iloc[0]), exp_recent_record)
        self.assertTupleEqual(df_result.shape, exp_shape)

        load_to_dwh(self._df_transformed, self.conn)

        df_result = db_utils.get_dwh_result_as_df(self.conn,
                                                  "select * from covid.daily_stats ds  order by ds.rep_date desc", "")
        exp_shape = (253, 4)
        exp_recent_record = list([datetime.date(2020, 9, 30), 7262695, 206852, 2840688])
        self.assertTupleEqual(df_result.shape, exp_shape)
        self.assertListEqual(list(df_result.iloc[0]), exp_recent_record)
Example #2
0
 def test_30_load_to_dwh(self):
     load_to_dwh(self._df_transformed, self.conn)
     df_result = db_utils.get_dwh_result_as_df(self.conn,
                                               "select * from covid.daily_stats ds  order by ds.rep_date desc", "")
     exp_shape = (253, 4)
     exp_recent_record = list([datetime.date(2020, 9, 30), 7262695, 206852, 2840688])
     exp_columns = list(['rep_date', 'cases', 'deaths', 'recovered'])
     self.assertTupleEqual(df_result.shape, exp_shape)
     self.assertListEqual(list(df_result.columns), exp_columns)
     self.assertListEqual(list(df_result.iloc[0]), exp_recent_record)
Example #3
0
 def test_32_load_to_dwh_empty_dataframe(self):
     column_names = ['date', 'cases', 'deaths', 'recovered']
     df_empty = pd.DataFrame(columns=column_names)
     load_to_dwh(df_empty, self.conn)
     df_result = db_utils.get_dwh_result_as_df(self.conn,
                                               "select * from covid.daily_stats ds  order by ds.rep_date desc", "")
     exp_shape = (253, 4)
     exp_recent_record = list([datetime.date(2020, 9, 30), 7262695, 206852, 2840688])
     self.assertTupleEqual(df_result.shape, exp_shape)
     self.assertListEqual(list(df_result.iloc[0]), exp_recent_record)
Example #4
0
    def test_42_load_incremental_duplicate_insert(self):
        df_result = db_utils.get_dwh_result_as_df(self.conn,
                                                  "select * from covid.daily_stats ds  order by ds.rep_date desc", "")

        exp_shape = (257, 4)
        exp_recent_record = list([datetime.date(2020, 10, 4), 7444705, 209603, 2911699])
        self.assertTupleEqual(df_result.shape, exp_shape)
        self.assertListEqual(list(df_result.iloc[0]), exp_recent_record)

        test_covid_nyt_data_latest = os.path.join(sys.path[0], 'testdata/nyt_data_latest.csv')

        df_nyt_data, df_jh_data = extract_covid_data(test_covid_nyt_data_latest, self._test_covid_jh_data)
        df_transformed_latest = transform(df_nyt_data, df_jh_data)

        load_to_dwh(df_transformed_latest, self.conn)
        df_result = db_utils.get_dwh_result_as_df(self.conn,
                                                  "select * from covid.daily_stats ds  order by ds.rep_date desc", "")

        exp_shape = (257, 4)
        exp_recent_record = list([datetime.date(2020, 10, 4), 7444705, 209603, 2911699])
        self.assertTupleEqual(df_result.shape, exp_shape)
        self.assertListEqual(list(df_result.iloc[0]), exp_recent_record)