def run(self):
        # We can also create a backup of the just downloaded PDF here

        df = scrape_mumbai_pdf(self.input().path)

        df.to_csv(self.output().path, index=False)

        # cleanup
        self.input().remove()
    def test_extract_sample_2020_01_02(self):
        expected = (24, 10)
        expected_first = ['RC', '21197', '19981', '600', '586', 0, np.nan, pd.to_datetime('2021-01-01 00:00:00'),
                          'Mumbai', 'MH']
        expected_last = ['B', '2153', '1942', '144', '60', 0, np.nan, pd.to_datetime('2021-01-01 00:00:00'), 'Mumbai',
                         'MH']

        sample = os.path.join(THIS_DIR, 'samples/mumbai_dashboard_2020_01_02.pdf')
        mumbai_output = scrape_mumbai_pdf(sample)
        
        result = mumbai_output[['ward', 'total.confirmed', 'total.recovered', 'total.deceased', 'total.active', 'total.other', 'total.tested', 'date', 'district', 'state']]
            
        result_first = result.iloc[0].values
        result_last = result.iloc[-1].values

        self.assertIsNotNone(result)

        self.assertEqual(expected, result.shape)

        np.array_equal(result_first, expected_first)
        np.array_equal(result_last, expected_last)
    def test_extract_sample_2020_02_27(self):
        # alignment issues in this pdf
        expected = (24, 10)
        expected_first = ['RC', '22879', '21544', '648', '649', 0, np.nan, pd.to_datetime('2021-02-25 00:00:00'),
                          'Mumbai', 'MH']
        expected_last = ['B', '2302', '2084', '146', '65', 0, np.nan, pd.to_datetime('2021-02-25 00:00:00'), 'Mumbai',
                         'MH']
        sample = os.path.join(THIS_DIR, 'samples/mumbai_dashboard_2021_02_27.pdf')
        
        mumbai_output = scrape_mumbai_pdf(sample)
        
        result = mumbai_output[['ward', 'total.confirmed', 'total.recovered', 'total.deceased', 'total.active', 'total.other', 'total.tested', 'date', 'district', 'state']]
            
        result_first = result.iloc[0].values
        result_last = result.iloc[-1].values

        self.assertIsNotNone(result)

        self.assertEqual(expected, result.shape)

        np.array_equal(result_first, expected_first)
        np.array_equal(result_last, expected_last)
    def test_extract_sample_2020_01_20(self):
        # alignment issues in this pdf
        expected = (24, 10)
        expected_first = [
            'RC', '21813', '20679', '615', '488', 0, np.nan,
            pd.to_datetime('2021-01-18 00:00:00'), 'Mumbai', 'MH'
        ]
        expected_last = [
            'B', '2195', '1995', '144', '49', 0, np.nan,
            pd.to_datetime('2021-01-18 00:00:00'), 'Mumbai', 'MH'
        ]
        sample = os.path.join(THIS_DIR,
                              'samples/mumbai_dashboard_2021_01_20.pdf')
        result = scrape_mumbai_pdf(sample)
        result_first = result.iloc[0].values
        result_last = result.iloc[-1].values

        self.assertIsNotNone(result)

        self.assertEqual(expected, result.shape)

        np.array_equal(result_first, expected_first)
        np.array_equal(result_last, expected_last)
    def test_extract_sample_2020_01_04(self):
        expected = (24, 10)
        expected_first = [
            'RC', '21264', '20064', '601', '568', 0, np.nan,
            pd.to_datetime('2021-01-03 00:00:00'), 'Mumbai', 'MH'
        ]
        expected_last = [
            'B', '2158', '1949', '144', '58', 0, np.nan,
            pd.to_datetime('2021-01-03 00:00:00'), 'Mumbai', 'MH'
        ]

        sample = os.path.join(THIS_DIR,
                              'samples/mumbai_dashboard_2020_01_04.pdf')
        result = scrape_mumbai_pdf(sample)
        result_first = result.iloc[0].values
        result_last = result.iloc[-1].values

        self.assertIsNotNone(result)

        self.assertEqual(expected, result.shape)

        np.array_equal(result_first, expected_first)
        np.array_equal(result_last, expected_last)