def test_alt_regexp(self): """Parser can extract data from a bill with alternate set of regular expressions.""" pattern = "datafeeds/scrapers/tests/fixtures/ladwp-%s.pdf" expected = [ BillingDatum( start=date(2020, 9, 2), end=date(2020, 10, 1), statement=date(2020, 10, 5), cost=386.52, used=840, peak=6, items=None, attachments=None, utility_code=None, ), ] self.assertEqual( expected, parse_pdf(pattern % "202010", "PMY00219-00010473", "kw") ) expected = [ BillingDatum( start=date(2020, 6, 5), end=date(2020, 7, 5), statement=date(2020, 7, 7), cost=357.98, used=720, peak=4.8, items=None, attachments=None, utility_code=None, ), ] self.assertEqual( expected, parse_pdf(pattern % "202007", "PMY00219-00010473", "kw") ) expected = [ BillingDatum( start=date(2019, 12, 12), end=date(2020, 1, 13), statement=date(2020, 1, 14), cost=47.48, used=0, peak=None, items=None, attachments=None, utility_code=None, ), ] self.assertEqual( expected, parse_pdf(pattern % "202001", "00106-00095149", "kw") )
def test_multi_bill_electric_cost_only(self): expected = [ BillingDatum( start=date(2020, 12, 3), end=date(2021, 1, 4), statement=date(2021, 2, 18), cost=132444.44, used=None, peak=None, items=None, attachments=None, utility_code=None, ), BillingDatum( start=date(2021, 1, 5), end=date(2021, 2, 2), statement=date(2021, 2, 18), cost=122005.01, used=None, peak=None, items=None, attachments=None, utility_code=None, ), ] self.assertEqual( expected, parse_pdf( "datafeeds/scrapers/tests/fixtures/ladwp-2020-12-multi.pdf", "1BPMYVL000231", "kw", ), )
def test_past_due(self): """Parser doesn't crash on a statement with no usage data.""" self.assertEqual( [], parse_pdf( "datafeeds/scrapers/tests/fixtures/ladwp-past-due-202008.pdf", "3770151783", "ccf", ), )
def test_single_account(self, _notify): """Parser can extract data from a single-account bill.""" filename = "datafeeds/scrapers/tests/fixtures/ladwp-single.pdf" expected = BillingDatum( start=date(2020, 9, 10), end=date(2020, 10, 12), statement=date(2020, 10, 13), cost=115955.98, used=571680, peak=1215.36, items=None, attachments=None, utility_code=None, ) self.assertEqual([expected], parse_pdf(filename, "PMY2V00231-00001054", "kw"))
def test_single_meter_cost(self, _notify): """Parser can extract the cost for a single meter from a multi-meter bill.""" filename = "datafeeds/scrapers/tests/fixtures/ladwp-4030.pdf" self.maxDiff = None expected = BillingDatum( start=date(2021, 3, 9), end=date(2021, 4, 8), statement=date(2021, 4, 9), # TODO: this is not extracted correctly cost=883.85, used=4464.0, peak=14.4, items=None, attachments=None, utility_code=None, ) self.assertEqual([expected], parse_pdf(filename, "PMY00209-00014118", "kw"))
def test_multiline_service_water(self): """Parser can extract water service data from a bill with multiple lines.""" expected = [ BillingDatum( start=date(2019, 12, 12), end=date(2020, 1, 13), statement=date(2020, 1, 15), cost=836.08, used=122, peak=None, items=None, attachments=None, utility_code=None, ), ] self.assertEqual( expected, parse_pdf( "datafeeds/scrapers/tests/fixtures/ladwp-water-202001.pdf", "2112228930", "ccf", ), )
def test_multi_service_fire(self): """Parser can extract fire service data from a bill with multiple fire service sections.""" expected = [ BillingDatum( start=date(2020, 9, 23), end=date(2020, 10, 22), statement=date(2020, 10, 23), cost=118.72, used=0, peak=None, items=None, attachments=None, utility_code=None, ), ] self.assertEqual( expected, parse_pdf( "datafeeds/scrapers/tests/fixtures/ladwp-fire-202010.pdf", "6293005254", "ccf", ), )
def test_electric_and_fire(self): """Parser can extract fire service data from a bill that also contains electricity usage.""" expected = [ BillingDatum( start=date(2020, 10, 13), end=date(2020, 11, 15), statement=date(2020, 11, 16), cost=118.72, used=0, peak=None, items=None, attachments=None, utility_code=None, ), ] self.assertEqual( expected, parse_pdf( "datafeeds/scrapers/tests/fixtures/ladwp-fire-202011.pdf", "4770151943", "ccf", ), )
def test_fire_only(self): """Parser can extract fire service data from a bill with only fire service data.""" expected = [ BillingDatum( start=date(2020, 8, 28), end=date(2020, 9, 28), statement=date(2020, 9, 29), cost=118.72, used=0, peak=None, items=None, attachments=None, utility_code=None, ), ] self.assertEqual( expected, parse_pdf( "datafeeds/scrapers/tests/fixtures/ladwp-fire-202009.pdf", "3631146704", "ccf", ), )
def test_water_sewer(self): """Parser can extract water data from a bill with water and sewer data.""" expected = [ BillingDatum( start=date(2020, 9, 28), end=date(2020, 10, 28), statement=date(2020, 10, 29), cost=2523.12, used=411, peak=None, items=None, attachments=None, utility_code=None, ), ] self.assertEqual( expected, parse_pdf( "datafeeds/scrapers/tests/fixtures/ladwp-water-202010.pdf", "9479723015", "ccf", ), )
def test_multi_account(self, _notify): """Parser can extract data from a multi-account bill.""" filename = "datafeeds/scrapers/tests/fixtures/ladwp-multi.pdf" expected = BillingDatum( start=date(2020, 9, 2), end=date(2020, 10, 1), statement=date(2020, 10, 5), cost=1175.67, used=4788, peak=13.68, items=None, attachments=None, utility_code=None, ) self.assertEqual([expected], parse_pdf(filename, "PMY00209-00014118", "kw")) expected = BillingDatum( start=date(2020, 9, 2), end=date(2020, 10, 1), statement=date(2020, 10, 5), cost=1223.64, used=5467, peak=13.03, items=None, attachments=None, utility_code=None, ) self.assertEqual([expected], parse_pdf(filename, "APMYD00209-00069098", "kw")) expected = BillingDatum( start=date(2020, 9, 2), end=date(2020, 10, 1), statement=date(2020, 10, 5), cost=882.04, used=3520, peak=10.22, items=None, attachments=None, utility_code=None, ) self.assertEqual([expected], parse_pdf(filename, "APMYD00209-00064175", "kw")) expected = BillingDatum( start=date(2020, 9, 2), end=date(2020, 10, 1), statement=date(2020, 10, 5), cost=2174.42, used=9498, peak=27.83, items=None, attachments=None, utility_code=None, ) self.assertEqual([expected], parse_pdf(filename, "APMYD00209-00064176", "kw")) expected = BillingDatum( start=date(2020, 9, 2), end=date(2020, 10, 1), statement=date(2020, 10, 5), cost=900.52, used=2772, peak=7.92, items=None, attachments=None, utility_code=None, ) self.assertEqual([expected], parse_pdf(filename, "PMY00209-00014142", "kw")) expected = BillingDatum( start=date(2020, 9, 2), end=date(2020, 10, 1), statement=date(2020, 10, 5), cost=1410.76, used=6217, peak=11.92, items=None, attachments=None, utility_code=None, ) self.assertEqual([expected], parse_pdf(filename, "APMYD00209-00069100", "kw")) expected = BillingDatum( start=date(2020, 9, 2), end=date(2020, 10, 1), statement=date(2020, 10, 5), cost=384.29, used=1062, peak=2.03, items=None, attachments=None, utility_code=None, ) self.assertEqual([expected], parse_pdf(filename, "APMYD00209-00064174", "kw")) expected = BillingDatum( start=date(2020, 9, 2), end=date(2020, 10, 1), statement=date(2020, 10, 5), cost=621.40, used=2316, peak=11.38, items=None, attachments=None, utility_code=None, ) self.assertEqual([expected], parse_pdf(filename, "PMY00209-00028877", "kw")) expected = BillingDatum( start=date(2020, 9, 2), end=date(2020, 10, 1), statement=date(2020, 10, 5), cost=27440.74, used=111744, peak=426.24, items=None, attachments=None, utility_code=None, ) self.assertEqual([expected], parse_pdf(filename, "1APMYV00277-00006259", "kw")) expected = BillingDatum( start=date(2020, 9, 2), end=date(2020, 10, 1), statement=date(2020, 10, 5), cost=1221.66, used=5220, peak=12.24, items=None, attachments=None, utility_code=None, ) self.assertEqual([expected], parse_pdf(filename, "PMY00209-00016347", "kw")) expected = BillingDatum( start=date(2020, 9, 2), end=date(2020, 10, 1), statement=date(2020, 10, 5), cost=1004.06, used=3888, peak=18, items=None, attachments=None, utility_code=None, ) self.assertEqual([expected], parse_pdf(filename, "PMY00209-00014123", "kw")) # uses peaks_2 pattern filename = "datafeeds/scrapers/tests/fixtures/ladwp-202004.pdf" expected = BillingDatum( start=date(2020, 3, 6), end=date(2020, 4, 6), statement=date(2020, 4, 7), cost=2566.24, used=12505, peak=37.72, items=None, attachments=None, utility_code=None, ) self.assertEqual([expected], parse_pdf(filename, "APMYD00209-00063954", "kw"))
def test_rebill(self, notify): """Parser can extract data from a bill with corrections.""" self.maxDiff = None filename = "datafeeds/scrapers/tests/fixtures/ladwp-rebill.pdf" # electricity expected = [ BillingDatum( start=date(2020, 3, 25), end=date(2020, 7, 21), statement=date(2020, 9, 24), cost=70308.37, used=297840, peak=338.4, items=None, attachments=None, utility_code=None, ), BillingDatum( start=date(2020, 7, 23), end=date(2020, 9, 20), statement=date(2020, 9, 24), cost=46004.04, used=200640.0, peak=295.2, items=None, attachments=None, utility_code=None, ), ] self.assertEqual(expected, parse_pdf(filename, "APMV00477-00001024", "kw")) notify.assert_called_once_with("APMV00477-00001024", date(2020, 9, 24)) # water expected = [ BillingDatum( start=date(2020, 5, 27), end=date(2020, 6, 23), statement=date(2020, 9, 24), cost=534.08, used=95, peak=None, items=None, attachments=None, utility_code=None, ), BillingDatum( start=date(2020, 6, 24), end=date(2020, 6, 30), statement=date(2020, 9, 24), cost=134.16, used=23.89655, peak=None, items=None, attachments=None, utility_code=None, ), BillingDatum( start=date(2020, 7, 1), end=date(2020, 7, 22), statement=date(2020, 9, 24), cost=469.08, used=75.10345, peak=None, items=None, attachments=None, utility_code=None, ), BillingDatum( start=date(2020, 7, 23), end=date(2020, 8, 20), statement=date(2020, 9, 24), cost=686.89, used=107, peak=None, items=None, attachments=None, utility_code=None, ), BillingDatum( start=date(2020, 8, 21), end=date(2020, 9, 20), statement=date(2020, 9, 24), cost=1031.67, used=150, peak=None, items=None, attachments=None, utility_code=None, ), ] self.assertEqual( expected, parse_pdf(filename, "2463041637", "ccf"), ) # fire expected = [ BillingDatum( start=date(2020, 5, 26), end=date(2020, 9, 20), statement=date(2020, 9, 24), cost=466.97, used=0, peak=None, items=None, attachments=None, utility_code=None, ), ] self.assertEqual( expected, parse_pdf(filename, "2463041281", "ccf"), )
configuration = LADWPBillPdfConfiguration( meter_number=meter_number, utility_account_id=meter_number, commodity="False", account_name=None, ) credentials = Credentials(username, password) scraper = LADWPBillPdfScraper( credentials, DateRange(start_date, end_date), configuration ) scraper.start() scraper.scrape( bills_handler=ft.partial( test_upload_bills, -1, meter_number, None, "ladwp-bill-pdf" ), partial_bills_handler=None, readings_handler=None, pdfs_handler=None, ) scraper.stop() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("filename", type=str) parser.add_argument("meter_number", type=str) parser.add_argument("commodity", type=str) args = parser.parse_args() bills = parse_pdf(args.filename, args.meter_number, args.commodity) print(bills)