def download_and_attach_pdf( self, bill_data: BillingDatum, billing_row: sce_pages.BillingDataRow ) -> BillingDatum: self.clear_pdf_downloads() bill_path = self.download_pdf_for_billing_row(billing_row) if bill_path: with open(bill_path, "rb") as bill_file: key = bill_upload.hash_bill_datum(self.service_id, bill_data) + ".pdf" return bill_data._replace( attachments=[ bill_upload.upload_bill_to_s3( bill_file, key, statement=bill_data.statement, source="sce.com", utility=self.utility, utility_account_id=self.utility_account_id, ) ] ) else: log.info( "No pdf bill was available for this period: %s to %s", bill_data.start, bill_data.end, ) return bill_data
def _execute(self): login_page = LoginPage(self._driver) home_page = login_page.login(self.username, self.password) self.screenshot("home_page") bill_history_page = home_page.to_bill_history() bill_history_page.set_dates(self.start_date, self.end_date) self.screenshot("bill_history") history = bill_history_page.gather_data() pdf_bytes = sum(len(t[0]) for t in history if t[0]) xls_bytes = sum(len(t[1]) for t in history if t[1]) pdfs = sum(1 for t in history if t[0]) xls = sum(1 for t in history if t[1]) log.info( "Acquired %s pdfs (%s bytes) and %s excel files (%s bytes)." % (pdfs, pdf_bytes, xls, xls_bytes) ) bills = [] for pdf, xls in history: bill_data = [] if xls is not None: bill_data = bill_data_from_xls(xls, self.service_account) elif pdf is not None: bill_data = bill_data_from_pdf( pdf, self.service_account, self.meter_serial ) if pdf is not None and bill_data: bill_data_prime = [] for bill_datum in bill_data: key = bill_upload.hash_bill_datum(self.service_account, bill_datum) # statement date is not visible in the bill PDF text; use end date attachment_entry = bill_upload.upload_bill_to_s3( BytesIO(pdf), key, source="atmosenergy.com", statement=bill_datum.end, utility=self.utility, utility_account_id=self.utility_account_id, ) if attachment_entry: bill_data_prime.append( bill_datum._replace(attachments=[attachment_entry]) ) else: bill_data_prime.append(bill_datum) bill_data = bill_data_prime if bill_data: bills += bill_data final_bills = adjust_bill_dates(bills) return Results(bills=final_bills)
def _execute(self): login_page = LoginPage(self._driver) home_page = login_page.login(self.keller_id, self.password) self.screenshot("home_page") bill_history_page = home_page.to_bill_history() self.screenshot("bill_history_page") bills = bill_history_page.gather_data(self.keller_id, self.start_date, self.end_date) log.info("Acquired %d bills (%s bytes total)." % (len(bills), sum(len(b) for b in bills))) bill_data = [] for b in bills: bill_datum = parse_bill_pdf(BytesIO(b)) if bill_datum is None: continue key = bill_upload.hash_bill_datum(self.account_number, bill_datum) # bill doesn't have a statement date; use end date attachment_entry = bill_upload.upload_bill_to_s3( BytesIO(b), key, statement=bill_datum.end, source="cityofkeller.com", utility=self.utility, utility_account_id=self.account_number, ) if attachment_entry: bill_data.append( bill_datum._replace(attachments=[attachment_entry])) else: bill_data.append(bill_datum) # bill periods overlap; adjust start dates adjusted_bill_data = [] for bill in bill_data: adjusted_bill_data.append( BillingDatum( start=bill.start + timedelta(days=1), end=bill.end, statement=bill.statement, cost=bill.cost, used=bill.used, peak=bill.peak, items=bill.items, attachments=bill.attachments, utility_code=None, )) final_bills = adjust_bill_dates(adjusted_bill_data) show_bill_summary(final_bills, "Final Bill Summary") return Results(bills=final_bills)
def make_billing_datum(self, bill_detail: BillPeriodDetails) -> BillingDatum: """Convert a billing detail summary from the website to a Gridium BillingDatum object""" # get statement date from link: Date=yyyy-mm-dd date_re = re.compile(r"Date=(\d\d\d\d-\d\d-\d\d)") match = ( date_re.search(bill_detail.download_link) if bill_detail.download_link else None ) statement = None if match: try: statement = parse_date(match.group(1)).date() except Exception as exc: log.warning("error parsing date %s: %s", match.group(1), exc) if not statement: statement = bill_detail.end bill_datum = BillingDatum( start=bill_detail.start, end=bill_detail.end, statement=statement, cost=bill_detail.total_charges, used=bill_detail.total_kwh, peak=bill_detail.max_kw, items=None, attachments=None, utility_code=bill_detail.utility_code, ) pdf_bytes = self.download_pdf(bill_detail) if pdf_bytes: key = bill_upload.hash_bill_datum(self.account_id, bill_datum) attachment_entry = bill_upload.upload_bill_to_s3( BytesIO(pdf_bytes), key, source="smud.org", statement=statement, utility=self.utility, utility_account_id=self.account_id, ) if attachment_entry: bill_datum = bill_datum._replace(attachments=[attachment_entry]) return bill_datum
def _execute(self): if self.end_date - self.start_date < timedelta(days=90): self.start_date = self.end_date - timedelta(days=90) log.info( "Initial time window was too narrow for this utility. Expanding time window to: %s - %s" % (self.start_date, self.end_date)) login_page = LoginPage(self._driver) home_page = login_page.login(self.username, self.password) log.info("Login successful. Loading bill history.") self.screenshot("post_login") bill_history_page = home_page.select_account(self.account_number) log.info("Loaded bill history page.") self.screenshot("bill_history") results = bill_history_page.gather_data(self.start_date, self.end_date) log.info("Obtained %s bill records and %s PDFs." % (len(results), sum(1 for _, f in results if f is not None))) bills = [] for bd, pdf_bytes in results: if pdf_bytes is None: bills.append(bd) continue key = bill_upload.hash_bill_datum(self.account_number, bd) attachment_entry = bill_upload.upload_bill_to_s3( BytesIO(pdf_bytes), key, statement=bd.statement, source="hudsonenergy.net", utility=self.utility, utility_account_id=self.account_number, ) if attachment_entry: bills.append(bd._replace(attachments=[attachment_entry])) else: bills.append(bd) final_bills = adjust_bill_dates(bills) return Results(bills=final_bills)
def get_bills(self, account_id: str, start: date, end: date) -> List[BillingDatum]: """Get bills from the table. for each row: get end from Read date column (date) get start date from end date - (Days column (date) - 1) get statement date from Bill date column (date) if not start - end overlaps passed in start / end, continue get peak from On-peak Billed kW (float) get used from (Off-peak kWh + Shoulder kWh + On-peak kWh) (float) get cost from New charges (float) click eye icon to download PDF; wait for download to complete to self.driver.download_dir """ WebDriverWait(self.driver, 10).until( EC.presence_of_element_located(self.UsageTableBodyLocator)) usage_table_rows = self.driver.find_elements( *self.UsageTableRowsLocator) bill_data: List[BillingDatum] = [] self.driver.screenshot(BaseWebScraper.screenshot_path("bill table")) for row in usage_table_rows: cols = row.find_elements_by_tag_name("td") cols = [ c for c in cols if "display: none" not in c.get_attribute("style") ] col = lambda x: cols[x].text to_num = lambda x: "".join(d for d in col(x) if d.isdigit() or d == ".") to_float = lambda x: float(to_num(x)) if len(to_num(x)) > 0 else 0 log.debug(f"statement={col(1)} end={col(2)} days={col(7)}") # statement date statement_date = date_parser.parse(col(1)).date() # bill end period_year = statement_date.year if statement_date.month == 1 and col(2).startswith("12"): period_year = statement_date.year - 1 end_str = f"{col(2)}/{period_year}" bill_end = date_parser.parse(end_str).date() # bill start bill_start = bill_end - timedelta(days=int(to_float(7)) - 1) log.debug(f"start={bill_start} end={bill_end}") if not self._overlap(start, end, bill_start, bill_end): log.info( f"skipping bill {bill_start} - {bill_end}: does not overlap requested range {start} - {end}" ) continue # cost new_charges = to_float(8) # used used = to_float(4) + to_float(5) + to_float(6) # peak peak = to_float(3) bill_datum = BillingDatum( start=bill_start, end=bill_end, statement=statement_date, cost=new_charges, used=used, peak=peak, items=None, attachments=None, utility_code=None, ) try: bill_pdf_name = "SRPbill{}{}.pdf".format( statement_date.strftime("%B"), statement_date.year) pdf_download_link = cols[0].find_element_by_tag_name("a") scroll_to(self.driver, pdf_download_link) pdf_download_link.click() log.info("looking for %s in %s", bill_pdf_name, self.driver.download_dir) self.driver.wait(60).until( file_exists_in_dir(self.driver.download_dir, bill_pdf_name)) except Exception as e: raise Exception( f"Failed to download bill {bill_pdf_name} for statement date {statement_date}:\n {e}" ) log.info( f"Bill {bill_pdf_name} for statement date {statement_date} downloaded successfully" ) attachment_entry = None # open downloaded PDF and upload if config.enabled("S3_BILL_UPLOAD"): key = hash_bill_datum(account_id, bill_datum) with open(f"{self.driver.download_dir}/{bill_pdf_name}", "rb") as pdf_data: attachment_entry = upload_bill_to_s3( BytesIO(pdf_data.read()), key, source="myaccount.srpnet.com", statement=bill_datum.statement, utility="utility:salt-river-project", utility_account_id=account_id, ) if attachment_entry: bill_data.append( bill_datum._replace(attachments=[attachment_entry])) else: bill_data.append(bill_datum) return bill_data
def _execute(self): if self.end_date - self.start_date < timedelta(days=MINIMUM_BILL_DAYS): log.info( f"Expanding date range to a minimum of {MINIMUM_BILL_DAYS} days." ) self.start_date = self.end_date - timedelta(days=MINIMUM_BILL_DAYS) start_date = max(self.start_date, (datetime.now() - relativedelta(years=10)).date()) end_date = min(self.end_date, (datetime.now().date())) log.info("Final date range to search: %s - %s" % (start_date, end_date)) login_page = LoginPage(self._driver) home_page = login_page.login(self.username, self.password) self.screenshot("home_screen") log.info("Login successful.") bill_history_page = home_page.to_bill_history() self.screenshot("bill_history_page") log.info("Loaded bill history.") bill_history_page.select_account(self.account_number) self.screenshot("account_selected") log.info("Selected account.") bill_history_page.set_dates(start_date, end_date) self.screenshot("dates_selected") log.info("Selected dates.") raw_pdfs = bill_history_page.gather_data() log.info("PDF bills captured: %s" % len(raw_pdfs)) log.info("Net bill pdf bytes captured: %s" % (sum(len(x) for x in raw_pdfs))) ii = 0 bill_data = [] for b in raw_pdfs: ii += 1 bill_datum = parse_bill_pdf(BytesIO(b), self.meter_number) if bill_datum is None: log.info("There was a problem parsing a bill PDF #%d." % ii) continue attachment_entry = None if config.enabled("S3_BILL_UPLOAD"): key = bill_upload.hash_bill_datum(self.meter_number, bill_datum) attachment_entry = bill_upload.upload_bill_to_s3( BytesIO(b), key, source="pacificpower.net", statement=bill_datum.statement, utility=self.utility, utility_account_id=self.account_number, ) if attachment_entry: bill_data.append( bill_datum._replace(attachments=[attachment_entry])) else: bill_data.append(bill_datum) final_bills = adjust_bill_dates(bill_data) show_bill_summary(final_bills, "Final Bill Summary") return Results(bills=final_bills)