Esempio n. 1
0
class TestDatabase(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        db_name = 'Testing.db'
        if os._exists(db_name):
            os.remove(db_name)
        cls.db = Database(db_name)
        cls.csv_df = testutils.get_dummy_data()

        cls.total_deaths_table = 'deaths_total'
        cls.death_change_python_table = 'deaths_change_python'

    def setUp(self) -> None:
        # Start with a clean slate
        self.total_deaths_df = DataHandler(
        ).get_total_deaths_per_country_and_day(self.csv_df)

        with self.db.create_connection() as con:
            cursor = con.cursor()
            cursor.execute('DROP TABLE IF EXISTS ' + self.total_deaths_table +
                           ';')
            cursor.execute('DROP TABLE IF EXISTS ' +
                           self.death_change_python_table + ';')
            con.commit()

    def test_create_connection(self):
        con = self.db.create_connection()
        self.assertIsNotNone(con, "Database connection")

    def test_no_data_duplicates(self):
        self.db.create_total_deaths_table()

        self.db._insert_deaths_data(self.total_deaths_df,
                                    self.total_deaths_table)
        with self.db.create_connection() as con:
            cursor = con.cursor()
            cursor.execute('SELECT COUNT(*) FROM ' + self.total_deaths_table +
                           ';')
            row_count = cursor.fetchone()[0]

        # Try to add the same data again.
        self.db._insert_deaths_data(self.total_deaths_df,
                                    self.total_deaths_table)

        with self.db.create_connection() as con:
            cursor.execute('SELECT COUNT(*) FROM ' + self.total_deaths_table +
                           ';')
            new_row_count = cursor.fetchone()[0]

        self.assertEqual(row_count, new_row_count)

    def test_function_output_and_table_rows(self):
        self.db.create_total_deaths_table()
        with self.db.create_connection() as con:
            cursor = con.cursor()
            changed_rows = self.db._insert_deaths_data(self.total_deaths_df,
                                                       self.total_deaths_table)
            cursor.execute('SELECT COUNT(*) FROM ' + self.total_deaths_table +
                           ';')
            row_count = cursor.fetchone()[0]
        self.assertEqual(
            changed_rows, row_count,
            "Number of rows in table needs to match add_new_death_data function's changed rows count"
        )

    def test_adding_new_rows_to_table(self):
        self.db.create_total_deaths_table()

        # Create a new df by removing some data
        starting_df = self.total_deaths_df[
            self.total_deaths_df['date'] != pd.to_datetime('2/13/2020')]
        self.db._insert_deaths_data(
            starting_df,
            self.total_deaths_table)  # Add it to the table. This is the
        # starting state.

        # Now we mimic a new update total_deaths_df Df has extra rows with i.e. latest stats
        changed_rows = self.db._insert_deaths_data(self.total_deaths_df,
                                                   self.total_deaths_table)

        table_len = self.db.execute_query('SELECT COUNT(*) FROM ' +
                                          self.total_deaths_table + ';')[0][0]

        self.assertEqual(
            table_len, len(self.total_deaths_df),
            "Only missing data should be added and existing data should not duplicate. Expected row count "
            + str(len(self.total_deaths_df)) + " actual row count " +
            str(table_len))

    def test_updating_table_with_modified_rows(self):
        self.db.create_total_deaths_table()

        # Create a new df by removing some data
        starting_df = self.total_deaths_df[
            self.total_deaths_df['date'] != pd.to_datetime('2/13/2020')]
        self.db.insert_to_deaths_total_table(
            starting_df)  # Add it to the table. This is the
        # starting state.

        # Now we mimic a new total_deaths_df Df that has extra rows  i.e. latest stats and few retrospectively
        # modified rows
        new_df = self.total_deaths_df.copy()
        new_df.loc[0, 'deaths'] = new_df.loc[0, 'deaths'] + 1
        new_df.loc[7, 'deaths'] = new_df.loc[7, 'deaths'] - 2

        changed_rows = self.db.insert_to_deaths_total_table(new_df)

        updates = self.total_deaths_df[self.total_deaths_df['date'] ==
                                       pd.to_datetime('2/13/2020')].append(
                                           new_df.iloc[[0, 7], ])

        self.assertIs(
            changed_rows, len(updates), 'Number of rows changed in the ' +
            self.total_deaths_table + ' must be the same as updated data \
                      rows')

        total_rows = self.db.execute_query('SELECT COUNT(*) FROM ' +
                                           self.total_deaths_table)[0][0]
        self.assertIs(
            total_rows, len(self.total_deaths_df),
            'The number of total rows in ' + self.total_deaths_table +
            ' must be the same as \
                                      total rows in data from the repo')

    def test_updating_table_to_correct_values(self):
        # Tests whether db table gets updated with correct values when doing subsequent runs
        self.db.create_total_deaths_table()

        # Create a new df by removing some data
        starting_df = self.total_deaths_df[
            self.total_deaths_df['date'] != pd.to_datetime('2/13/2020')]
        self.db.insert_to_deaths_total_table(
            starting_df)  # Add it to the table. This is the starting state.

        # Now we mimic a new update total_deaths_df Df has extra rows with i.e. latest stats and few retrospectively
        # modified rows
        new_df = self.total_deaths_df.copy()
        new_df.loc[1, 'deaths'] = new_df.loc[1, 'deaths'] + 1
        new_df.loc[6, 'deaths'] = new_df.loc[6, 'deaths'] - 2

        changed_rows = self.db.insert_to_deaths_total_table(new_df)

        updates = self.total_deaths_df[self.total_deaths_df['date'] ==
                                       pd.to_datetime('2/13/2020')].append(
                                           new_df.iloc[[1, 6], ])
        updates = updates.sort_values(['date', 'country'])
        updates.loc[:, 'date'] = updates.loc[:, 'date'].dt.strftime(
            '%Y-%m-%d %H:%M:%S')
        updates.reset_index(drop=True, inplace=True)

        with self.db.create_connection() as con:
            result = pd.read_sql(
                'SELECT * FROM ' + self.total_deaths_table + ' \
                WHERE \
                    date="' +
                pd.to_datetime('2020-02-13').strftime('%Y-%m-%d %H:%M:%S') +
                '"\
                     OR (\
                        country="' + new_df.loc[1, 'country'] + '" \
                        AND date="' +
                new_df.loc[1, 'date'].strftime('%Y-%m-%d %H:%M:%S') + '"\
                        )\
                      OR ( \
                        country="' + new_df.loc[6, 'country'] + '" \
                        AND date="' +
                new_df.loc[6, 'date'].strftime('%Y-%m-%d %H:%M:%S') + '" \
                        )\
                ORDER BY date, country;',
                con=con)

        self.assertIs(
            len(result), len(updates),
            "The number of updated rows in db table needs to match the data rows"
        )
        self.assertTrue(
            updates.equals(result),
            " Updated data in the database table must match the changed rows in repo"
        )

    def test_daily_change_sql_query(self):
        # Create and populate deaths_total table
        self.db.create_total_deaths_table()
        self.db._insert_deaths_data(self.total_deaths_df,
                                    self.total_deaths_table)
        self.db.execute_query("DROP TABLE IF EXISTS deaths_change_sql;")

        sql = "CREATE TABLE deaths_change_sql AS \
                        SELECT country, date, \
                        deaths - LAG(deaths,1,0) OVER (PARTITION BY country ORDER BY date) AS deaths_change \
                        FROM deaths_total;"

        self.db.execute_query(sql)

        with self.db.create_connection() as con:
            change_df = pd.read_sql('SELECT * FROM deaths_change_sql', con=con)

        expected_df = testutils.get_dummy_change_data(
        )  # Expected deaths change data
        expected_df.loc[:, 'date'] = expected_df.loc[:, 'date'].dt.strftime(
            '%Y-%m-%d %H:%M:%S')
        self.assertTrue(change_df.equals(expected_df))

    def test_db_insert_daily_change_of_deaths(self):
        self.db.create_deaths_change_python_table()
        daily_change = DataHandler().get_daily_change_of_deaths(
            self.total_deaths_df)

        changed_rows = self.db.insert_to_deaths_change_python_table(
            daily_change)

        rows_in_table = self.db.execute_query(
            "SELECT COUNT(*) FROM " + self.death_change_python_table)[0][0]

        self.assertEqual(
            rows_in_table, changed_rows,
            "Data rows in deaths_change_python table and the number of changed_rows must match"
        )

    def test_no_db_update_on_subsequent_daily_changes_calculations(self):

        # Calculate daily change with current data and insert them to deaths_change_python table
        self.db.create_deaths_change_python_table()
        daily_change = DataHandler().get_daily_change_of_deaths(
            self.total_deaths_df)
        changed_rows = self.db.insert_to_deaths_change_python_table(
            daily_change)

        new_daily_change = DataHandler().get_daily_change_of_deaths(
            self.total_deaths_df)

        self.assertTrue(daily_change.equals(new_daily_change))
        new_changed_rows = self.db.insert_to_deaths_change_python_table(
            new_daily_change)

        self.assertIs(
            new_changed_rows, 0,
            "There should not be any changes to deaths_change_python table in the second run"
        )