def download_and_attach_pdf(
     self, bill_data: BillingDatum, billing_row: sce_pages.BillingDataRow
 ) -> BillingDatum:
     self.clear_pdf_downloads()
     bill_path = self.download_pdf_for_billing_row(billing_row)
     if bill_path:
         with open(bill_path, "rb") as bill_file:
             key = bill_upload.hash_bill_datum(self.service_id, bill_data) + ".pdf"
             return bill_data._replace(
                 attachments=[
                     bill_upload.upload_bill_to_s3(
                         bill_file,
                         key,
                         statement=bill_data.statement,
                         source="sce.com",
                         utility=self.utility,
                         utility_account_id=self.utility_account_id,
                     )
                 ]
             )
     else:
         log.info(
             "No pdf bill was available for this period: %s to %s",
             bill_data.start,
             bill_data.end,
         )
         return bill_data
예제 #2
0
def adjust_billing_datum_type(bill: BillingDatum):
    """
    Handles when datetimes are passed into BillingDatum.

    Replaces start/end on the BillingDatum object with dates
    instead of datetimes, if applicable
    """
    bill_start = bill.start
    bill_end = bill.end

    if type(bill_start) == datetime:
        bill_start = bill_start.date()  # type: ignore

    if type(bill_end) == datetime:
        bill_end = bill_end.date()  # type: ignore

    return bill._replace(start=bill_start, end=bill_end)
    def make_billing_datum(self, bill_detail: BillPeriodDetails) -> BillingDatum:
        """Convert a billing detail summary from the website to a Gridium BillingDatum object"""
        # get statement date from link: Date=yyyy-mm-dd
        date_re = re.compile(r"Date=(\d\d\d\d-\d\d-\d\d)")
        match = (
            date_re.search(bill_detail.download_link)
            if bill_detail.download_link
            else None
        )
        statement = None
        if match:
            try:
                statement = parse_date(match.group(1)).date()
            except Exception as exc:
                log.warning("error parsing date %s: %s", match.group(1), exc)
        if not statement:
            statement = bill_detail.end
        bill_datum = BillingDatum(
            start=bill_detail.start,
            end=bill_detail.end,
            statement=statement,
            cost=bill_detail.total_charges,
            used=bill_detail.total_kwh,
            peak=bill_detail.max_kw,
            items=None,
            attachments=None,
            utility_code=bill_detail.utility_code,
        )

        pdf_bytes = self.download_pdf(bill_detail)
        if pdf_bytes:
            key = bill_upload.hash_bill_datum(self.account_id, bill_datum)
            attachment_entry = bill_upload.upload_bill_to_s3(
                BytesIO(pdf_bytes),
                key,
                source="smud.org",
                statement=statement,
                utility=self.utility,
                utility_account_id=self.account_id,
            )
            if attachment_entry:
                bill_datum = bill_datum._replace(attachments=[attachment_entry])

        return bill_datum
예제 #4
0
    def get_bills(self, utility: str,
                  utility_account_id: str) -> List[BillingDatum]:
        billing_data = []

        available_dates = self.driver.find_elements(
            By.CSS_SELECTOR, "table.table-alt a.bill-view-link")
        available_dates = [parse_date(i.text).date() for i in available_dates]
        log.info("available dates: %s",
                 [dt.strftime("%Y-%m-%d") for dt in available_dates])

        xpath_locators = {
            # Finds the last KWH reading under Total Usage column
            "cost":
            "//table[contains(., 'NEW CHARGES')]/tbody/tr/td[3]",
            "used":
            "(//table[contains(.,'USAGE')]//tr/td[contains(., 'KWH')])",
            "usage_kw":
            "//table[contains(.,'USAGE')]//tr/td[contains(.,'KW') and not(contains(.,'KWH'))]",
        }

        # loop through dates in table in ascending order
        for pdf_date in reversed(available_dates):
            # skip if the date isn't in the specified range
            if not (self.start_date <= pdf_date <= self.end_date):
                log.debug("skipping date outside range: %s", pdf_date)
                continue

            view_bill_link = self.driver.find_element_by_xpath(
                '//a[.="%s"]' % pdf_date.strftime("%m/%d/%Y"))
            scroll_to(self.driver, view_bill_link)

            self.driver.sleep(0.5)
            view_bill_link.click()

            self.driver.wait(30).until(
                EC.visibility_of_element_located(
                    (By.CSS_SELECTOR, "div.billImage")))

            start_date = None
            end_date = None
            cost = None
            used = None
            peak = None

            dates_line_text: str = self.driver.find_element_by_xpath(
                "//td[contains(., 'Service From:')]").text
            dates_match = re.search(
                r"Service From: (?P<from>\w+ \d\d) to (?P<to>\w+ \d\d) \(\d\d Days\)",
                dates_line_text,
            )

            if dates_match:
                # if from month is December, use previous year
                year = (pdf_date.year -
                        1 if "dec" in dates_match.group("from").lower() else
                        pdf_date.year)
                start_date = parse_date("%s %s" %
                                        (dates_match.group("from"), year))
                end_date = parse_date(
                    dates_match.group("to") + pdf_date.strftime(" %Y"))

            cost_match = self.driver.find(xpath_locators["cost"], xpath=True)
            if cost_match:
                cost = cost_match.text
                cost = float(cost.replace("$", "").replace(",", ""))

            kwh_usages = []
            for match in self.driver.find_all(xpath_locators["used"],
                                              xpath=True):
                # include only if it has a reading values as siblings; exclude credit line items
                parent = match.find_element_by_xpath("..")
                # meter number, previous reading, current reading
                readings_text = ""
                for idx, child in enumerate(
                        parent.find_elements_by_xpath(".//td")):
                    log.debug("\t%s\t%s", idx, child.text.strip())
                    readings_text += child.text.strip()
                    if idx == 2:
                        break
                if not readings_text:
                    log.info("skipping non-reading line item: %s", parent.text)
                    continue
                kwh_value = float(
                    match.text.replace("KWH", "").replace(",", "").strip())
                kwh_usages.append(kwh_value)

            if kwh_usages:
                used = sum(kwh_usages)

            kw_usages = []
            for usage_kw_match in self.driver.find_all(
                    xpath_locators["usage_kw"], xpath=True):
                kw_usages.append(
                    float(
                        usage_kw_match.text.replace("KW",
                                                    "").replace(",",
                                                                "").strip()))

            if kw_usages:
                peak = max(kw_usages)

            data = BillingDatum(
                start=start_date,
                end=end_date - timedelta(days=1),
                statement=end_date - timedelta(days=1),
                cost=cost,
                peak=peak,
                used=used,
                items=None,
                attachments=None,
                utility_code=None,
            )

            self.driver.find("a#billImageToPrint").click()
            self.driver.sleep(1)
            self.driver.switch_to.window(self.driver.window_handles[-1])

            # the filename of the printed pdf is f"{current page title}.pdf"
            self.driver.execute_script("window.print();")

            try:
                file_exists_in_dir(directory=self.download_dir,
                                   pattern=r"^Bill View Bill Image.pdf$")
            except Exception:
                raise Exception("Unable to download file for %s" % pdf_date)

            curr_path = os.path.join(self.download_dir,
                                     "Bill View Bill Image.pdf")
            new_path = os.path.join(
                self.download_dir, f"bill_{pdf_date.strftime('%Y-%m-%d')}.pdf")
            os.rename(curr_path, new_path)

            log.info("parsed bill for %s - %s", data.start, data.end)

            self.driver.find("a#close").click()
            self.driver.sleep(1)
            self.driver.switch_to.window(self.driver.window_handles[-1])
            self.driver.sleep(1)

            # upload PDF:
            key = hash_bill(
                utility_account_id,
                data.start,
                data.end,
                data.cost,
                data.peak,
                data.used,
            )

            with open(new_path, "rb") as pdf_data:
                attachment_entry = upload_bill_to_s3(
                    BytesIO(pdf_data.read()),
                    key,
                    source="www.duke-energy.com",
                    statement=data.end,
                    utility=utility,
                    utility_account_id=utility_account_id,
                )

            if attachment_entry:
                data = data._replace(attachments=[attachment_entry])

            billing_data.append(data)

            # Click Bill Information in breadcrumbs to go back to bills list page
            self.driver.find("a#billInformation").click()

        return billing_data
예제 #5
0
    def get_bills(self, account_id: str, start: date,
                  end: date) -> List[BillingDatum]:
        """Get bills from the table.

        for each row:
          get end from Read date column (date)
          get start date from end date - (Days column (date) - 1)
          get statement date from Bill date column (date)
          if not start - end overlaps passed in start / end, continue
          get peak from On-peak Billed kW (float)
          get used from (Off-peak kWh + Shoulder kWh + On-peak kWh) (float)
          get cost from New charges (float)
          click eye icon to download PDF; wait for download to complete to self.driver.download_dir
        """
        WebDriverWait(self.driver, 10).until(
            EC.presence_of_element_located(self.UsageTableBodyLocator))
        usage_table_rows = self.driver.find_elements(
            *self.UsageTableRowsLocator)

        bill_data: List[BillingDatum] = []
        self.driver.screenshot(BaseWebScraper.screenshot_path("bill table"))
        for row in usage_table_rows:
            cols = row.find_elements_by_tag_name("td")
            cols = [
                c for c in cols
                if "display: none" not in c.get_attribute("style")
            ]

            col = lambda x: cols[x].text
            to_num = lambda x: "".join(d for d in col(x)
                                       if d.isdigit() or d == ".")
            to_float = lambda x: float(to_num(x)) if len(to_num(x)) > 0 else 0

            log.debug(f"statement={col(1)} end={col(2)} days={col(7)}")
            # statement date
            statement_date = date_parser.parse(col(1)).date()

            # bill end
            period_year = statement_date.year
            if statement_date.month == 1 and col(2).startswith("12"):
                period_year = statement_date.year - 1
            end_str = f"{col(2)}/{period_year}"
            bill_end = date_parser.parse(end_str).date()

            # bill start
            bill_start = bill_end - timedelta(days=int(to_float(7)) - 1)
            log.debug(f"start={bill_start} end={bill_end}")

            if not self._overlap(start, end, bill_start, bill_end):
                log.info(
                    f"skipping bill {bill_start} - {bill_end}: does not overlap requested range {start} - {end}"
                )
                continue

            # cost
            new_charges = to_float(8)
            # used
            used = to_float(4) + to_float(5) + to_float(6)
            # peak
            peak = to_float(3)

            bill_datum = BillingDatum(
                start=bill_start,
                end=bill_end,
                statement=statement_date,
                cost=new_charges,
                used=used,
                peak=peak,
                items=None,
                attachments=None,
                utility_code=None,
            )

            try:
                bill_pdf_name = "SRPbill{}{}.pdf".format(
                    statement_date.strftime("%B"), statement_date.year)
                pdf_download_link = cols[0].find_element_by_tag_name("a")
                scroll_to(self.driver, pdf_download_link)
                pdf_download_link.click()
                log.info("looking for %s in %s", bill_pdf_name,
                         self.driver.download_dir)
                self.driver.wait(60).until(
                    file_exists_in_dir(self.driver.download_dir,
                                       bill_pdf_name))
            except Exception as e:
                raise Exception(
                    f"Failed to download bill {bill_pdf_name} for statement date {statement_date}:\n {e}"
                )
            log.info(
                f"Bill {bill_pdf_name} for statement date {statement_date} downloaded successfully"
            )

            attachment_entry = None
            # open downloaded PDF and upload
            if config.enabled("S3_BILL_UPLOAD"):
                key = hash_bill_datum(account_id, bill_datum)
                with open(f"{self.driver.download_dir}/{bill_pdf_name}",
                          "rb") as pdf_data:
                    attachment_entry = upload_bill_to_s3(
                        BytesIO(pdf_data.read()),
                        key,
                        source="myaccount.srpnet.com",
                        statement=bill_datum.statement,
                        utility="utility:salt-river-project",
                        utility_account_id=account_id,
                    )
            if attachment_entry:
                bill_data.append(
                    bill_datum._replace(attachments=[attachment_entry]))
            else:
                bill_data.append(bill_datum)
        return bill_data