Ejemplos de upload_bill_to_s3 en Python, ejemplos de datafeeds.common.upload.upload_bill_to_s3 en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: energymanager_billing.py Proyecto: gnoose/datafeeds-shared

 def download_and_attach_pdf(
     self, bill_data: BillingDatum, billing_row: sce_pages.BillingDataRow
 ) -> BillingDatum:
     self.clear_pdf_downloads()
     bill_path = self.download_pdf_for_billing_row(billing_row)
     if bill_path:
         with open(bill_path, "rb") as bill_file:
             key = bill_upload.hash_bill_datum(self.service_id, bill_data) + ".pdf"
             return bill_data._replace(
                 attachments=[
                     bill_upload.upload_bill_to_s3(
                         bill_file,
                         key,
                         statement=bill_data.statement,
                         source="sce.com",
                         utility=self.utility,
                         utility_account_id=self.utility_account_id,
                     )
                 ]
             )
     else:
         log.info(
             "No pdf bill was available for this period: %s to %s",
             bill_data.start,
             bill_data.end,
         )
         return bill_data

Ejemplo n.º 2

0

Mostrar archivo

    def _execute(self):
        login_page = LoginPage(self._driver)
        home_page = login_page.login(self.username, self.password)
        self.screenshot("home_page")
        bill_history_page = home_page.to_bill_history()
        bill_history_page.set_dates(self.start_date, self.end_date)
        self.screenshot("bill_history")

        history = bill_history_page.gather_data()

        pdf_bytes = sum(len(t[0]) for t in history if t[0])
        xls_bytes = sum(len(t[1]) for t in history if t[1])
        pdfs = sum(1 for t in history if t[0])
        xls = sum(1 for t in history if t[1])
        log.info(
            "Acquired %s pdfs (%s bytes) and %s excel files (%s bytes)."
            % (pdfs, pdf_bytes, xls, xls_bytes)
        )

        bills = []
        for pdf, xls in history:

            bill_data = []
            if xls is not None:
                bill_data = bill_data_from_xls(xls, self.service_account)
            elif pdf is not None:
                bill_data = bill_data_from_pdf(
                    pdf, self.service_account, self.meter_serial
                )

            if pdf is not None and bill_data:
                bill_data_prime = []
                for bill_datum in bill_data:
                    key = bill_upload.hash_bill_datum(self.service_account, bill_datum)
                    # statement date is not visible in the bill PDF text; use end date
                    attachment_entry = bill_upload.upload_bill_to_s3(
                        BytesIO(pdf),
                        key,
                        source="atmosenergy.com",
                        statement=bill_datum.end,
                        utility=self.utility,
                        utility_account_id=self.utility_account_id,
                    )
                    if attachment_entry:
                        bill_data_prime.append(
                            bill_datum._replace(attachments=[attachment_entry])
                        )
                    else:
                        bill_data_prime.append(bill_datum)
                bill_data = bill_data_prime

            if bill_data:
                bills += bill_data

        final_bills = adjust_bill_dates(bills)
        return Results(bills=final_bills)

Ejemplo n.º 3

0

Mostrar archivo

    def _execute(self):
        login_page = LoginPage(self._driver)
        home_page = login_page.login(self.keller_id, self.password)
        self.screenshot("home_page")

        bill_history_page = home_page.to_bill_history()
        self.screenshot("bill_history_page")

        bills = bill_history_page.gather_data(self.keller_id, self.start_date,
                                              self.end_date)

        log.info("Acquired %d bills (%s bytes total)." %
                 (len(bills), sum(len(b) for b in bills)))

        bill_data = []
        for b in bills:
            bill_datum = parse_bill_pdf(BytesIO(b))

            if bill_datum is None:
                continue

            key = bill_upload.hash_bill_datum(self.account_number, bill_datum)
            # bill doesn't have a statement date; use end date
            attachment_entry = bill_upload.upload_bill_to_s3(
                BytesIO(b),
                key,
                statement=bill_datum.end,
                source="cityofkeller.com",
                utility=self.utility,
                utility_account_id=self.account_number,
            )
            if attachment_entry:
                bill_data.append(
                    bill_datum._replace(attachments=[attachment_entry]))
            else:
                bill_data.append(bill_datum)

        # bill periods overlap; adjust start dates
        adjusted_bill_data = []
        for bill in bill_data:
            adjusted_bill_data.append(
                BillingDatum(
                    start=bill.start + timedelta(days=1),
                    end=bill.end,
                    statement=bill.statement,
                    cost=bill.cost,
                    used=bill.used,
                    peak=bill.peak,
                    items=bill.items,
                    attachments=bill.attachments,
                    utility_code=None,
                ))
        final_bills = adjust_bill_dates(adjusted_bill_data)
        show_bill_summary(final_bills, "Final Bill Summary")
        return Results(bills=final_bills)

Ejemplo n.º 4

0

Mostrar archivo

def process_pdf(
    utility: str,
    utility_account_id: str,
    service_id: str,
    statement_dt: date,
    pdf_filename: str,
) -> BillingDatum:
    log.info("Parsing text from PDF %s", pdf_filename)
    text = pdfparser.pdf_to_str(pdf_filename)

    cost = extract_cost(text)
    used = extract_used(text)
    demand = extract_demand(text)
    start_date, end_date = extract_dates(text)

    # if the start date is in the wrong year, replace year (start_date = 12/1, statement_dt=12/15/2020)
    if start_date > statement_dt:
        start_date = start_date.replace(year=statement_dt.year)
        end_date = end_date.replace(year=statement_dt.year)
    # end_date must be after start date (end_date = 1/5, start_date = 12/1)
    if end_date < start_date:
        end_date = end_date.replace(year=end_date.year + 1)

    # adjust end date because SVP bills overlap on start/end dates
    end_date = end_date - timedelta(days=1)
    line_items: List[BillingDatumItemsEntry] = extract_line_items(text)
    key = hash_bill(
        service_id,
        start_date,
        end_date,
        cost,
        demand,
        used,
    )
    with open(pdf_filename, "rb") as pdf_data:
        attachment_entry = upload_bill_to_s3(
            BytesIO(pdf_data.read()),
            key,
            source="mua.santaclaraca.gov",
            statement=end_date,
            utility=utility,
            utility_account_id=utility_account_id,
        )

    return BillingDatum(
        start=start_date,
        end=end_date,
        statement=statement_dt,
        cost=cost,
        used=used,
        peak=demand,
        items=line_items,
        attachments=[attachment_entry],
        utility_code=None,
    )

Ejemplo n.º 5

0

Mostrar archivo

Archivo: smud_myaccount_billing.py Proyecto: gnoose/datafeeds-shared

    def make_billing_datum(self, bill_detail: BillPeriodDetails) -> BillingDatum:
        """Convert a billing detail summary from the website to a Gridium BillingDatum object"""
        # get statement date from link: Date=yyyy-mm-dd
        date_re = re.compile(r"Date=(\d\d\d\d-\d\d-\d\d)")
        match = (
            date_re.search(bill_detail.download_link)
            if bill_detail.download_link
            else None
        )
        statement = None
        if match:
            try:
                statement = parse_date(match.group(1)).date()
            except Exception as exc:
                log.warning("error parsing date %s: %s", match.group(1), exc)
        if not statement:
            statement = bill_detail.end
        bill_datum = BillingDatum(
            start=bill_detail.start,
            end=bill_detail.end,
            statement=statement,
            cost=bill_detail.total_charges,
            used=bill_detail.total_kwh,
            peak=bill_detail.max_kw,
            items=None,
            attachments=None,
            utility_code=bill_detail.utility_code,
        )

        pdf_bytes = self.download_pdf(bill_detail)
        if pdf_bytes:
            key = bill_upload.hash_bill_datum(self.account_id, bill_datum)
            attachment_entry = bill_upload.upload_bill_to_s3(
                BytesIO(pdf_bytes),
                key,
                source="smud.org",
                statement=statement,
                utility=self.utility,
                utility_account_id=self.account_id,
            )
            if attachment_entry:
                bill_datum = bill_datum._replace(attachments=[attachment_entry])

        return bill_datum

Ejemplo n.º 6

0

Mostrar archivo

    def _execute(self):
        if self.end_date - self.start_date < timedelta(days=90):
            self.start_date = self.end_date - timedelta(days=90)
            log.info(
                "Initial time window was too narrow for this utility. Expanding time window to: %s - %s"
                % (self.start_date, self.end_date))

        login_page = LoginPage(self._driver)
        home_page = login_page.login(self.username, self.password)

        log.info("Login successful. Loading bill history.")
        self.screenshot("post_login")
        bill_history_page = home_page.select_account(self.account_number)

        log.info("Loaded bill history page.")
        self.screenshot("bill_history")
        results = bill_history_page.gather_data(self.start_date, self.end_date)

        log.info("Obtained %s bill records and %s PDFs." %
                 (len(results), sum(1 for _, f in results if f is not None)))

        bills = []
        for bd, pdf_bytes in results:
            if pdf_bytes is None:
                bills.append(bd)
                continue

            key = bill_upload.hash_bill_datum(self.account_number, bd)
            attachment_entry = bill_upload.upload_bill_to_s3(
                BytesIO(pdf_bytes),
                key,
                statement=bd.statement,
                source="hudsonenergy.net",
                utility=self.utility,
                utility_account_id=self.account_number,
            )
            if attachment_entry:
                bills.append(bd._replace(attachments=[attachment_entry]))
            else:
                bills.append(bd)

        final_bills = adjust_bill_dates(bills)
        return Results(bills=final_bills)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: pdf_parser.py Proyecto: gnoose/datafeeds-shared

def parse_pdf(pdf_filename: str, utility: str,
              utility_account_id: str) -> BillingDatum:
    text = pdfparser.pdf_to_str(pdf_filename)
    if "Your Energy Bill" in text:
        log.info("parsing new-style PDF %s", pdf_filename)
        data = parse_new_pdf(text)
    else:
        log.info("parsing old-style PDF %s", pdf_filename)
        data = parse_old_pdf(text)
    key = hash_bill(utility_account_id, data.start, data.end, data.cost,
                    data.peak, data.used)
    with open(pdf_filename, "rb") as pdf_data:
        attachment_entry = upload_bill_to_s3(
            BytesIO(pdf_data.read()),
            key,
            source="www.duke-energy.com",
            statement=data.end,
            utility=utility,
            utility_account_id=utility_account_id,
        )
    return data._replace(attachments=[attachment_entry])

Ejemplo n.º 8

0

Mostrar archivo

    def get_bills(self, utility: str,
                  utility_account_id: str) -> List[BillingDatum]:
        billing_data = []

        available_dates = self.driver.find_elements(
            By.CSS_SELECTOR, "table.table-alt a.bill-view-link")
        available_dates = [parse_date(i.text).date() for i in available_dates]
        log.info("available dates: %s",
                 [dt.strftime("%Y-%m-%d") for dt in available_dates])

        xpath_locators = {
            # Finds the last KWH reading under Total Usage column
            "cost":
            "//table[contains(., 'NEW CHARGES')]/tbody/tr/td[3]",
            "used":
            "(//table[contains(.,'USAGE')]//tr/td[contains(., 'KWH')])",
            "usage_kw":
            "//table[contains(.,'USAGE')]//tr/td[contains(.,'KW') and not(contains(.,'KWH'))]",
        }

        # loop through dates in table in ascending order
        for pdf_date in reversed(available_dates):
            # skip if the date isn't in the specified range
            if not (self.start_date <= pdf_date <= self.end_date):
                log.debug("skipping date outside range: %s", pdf_date)
                continue

            view_bill_link = self.driver.find_element_by_xpath(
                '//a[.="%s"]' % pdf_date.strftime("%m/%d/%Y"))
            scroll_to(self.driver, view_bill_link)

            self.driver.sleep(0.5)
            view_bill_link.click()

            self.driver.wait(30).until(
                EC.visibility_of_element_located(
                    (By.CSS_SELECTOR, "div.billImage")))

            start_date = None
            end_date = None
            cost = None
            used = None
            peak = None

            dates_line_text: str = self.driver.find_element_by_xpath(
                "//td[contains(., 'Service From:')]").text
            dates_match = re.search(
                r"Service From: (?P<from>\w+ \d\d) to (?P<to>\w+ \d\d) \(\d\d Days\)",
                dates_line_text,
            )

            if dates_match:
                # if from month is December, use previous year
                year = (pdf_date.year -
                        1 if "dec" in dates_match.group("from").lower() else
                        pdf_date.year)
                start_date = parse_date("%s %s" %
                                        (dates_match.group("from"), year))
                end_date = parse_date(
                    dates_match.group("to") + pdf_date.strftime(" %Y"))

            cost_match = self.driver.find(xpath_locators["cost"], xpath=True)
            if cost_match:
                cost = cost_match.text
                cost = float(cost.replace("$", "").replace(",", ""))

            kwh_usages = []
            for match in self.driver.find_all(xpath_locators["used"],
                                              xpath=True):
                # include only if it has a reading values as siblings; exclude credit line items
                parent = match.find_element_by_xpath("..")
                # meter number, previous reading, current reading
                readings_text = ""
                for idx, child in enumerate(
                        parent.find_elements_by_xpath(".//td")):
                    log.debug("\t%s\t%s", idx, child.text.strip())
                    readings_text += child.text.strip()
                    if idx == 2:
                        break
                if not readings_text:
                    log.info("skipping non-reading line item: %s", parent.text)
                    continue
                kwh_value = float(
                    match.text.replace("KWH", "").replace(",", "").strip())
                kwh_usages.append(kwh_value)

            if kwh_usages:
                used = sum(kwh_usages)

            kw_usages = []
            for usage_kw_match in self.driver.find_all(
                    xpath_locators["usage_kw"], xpath=True):
                kw_usages.append(
                    float(
                        usage_kw_match.text.replace("KW",
                                                    "").replace(",",
                                                                "").strip()))

            if kw_usages:
                peak = max(kw_usages)

            data = BillingDatum(
                start=start_date,
                end=end_date - timedelta(days=1),
                statement=end_date - timedelta(days=1),
                cost=cost,
                peak=peak,
                used=used,
                items=None,
                attachments=None,
                utility_code=None,
            )

            self.driver.find("a#billImageToPrint").click()
            self.driver.sleep(1)
            self.driver.switch_to.window(self.driver.window_handles[-1])

            # the filename of the printed pdf is f"{current page title}.pdf"
            self.driver.execute_script("window.print();")

            try:
                file_exists_in_dir(directory=self.download_dir,
                                   pattern=r"^Bill View Bill Image.pdf$")
            except Exception:
                raise Exception("Unable to download file for %s" % pdf_date)

            curr_path = os.path.join(self.download_dir,
                                     "Bill View Bill Image.pdf")
            new_path = os.path.join(
                self.download_dir, f"bill_{pdf_date.strftime('%Y-%m-%d')}.pdf")
            os.rename(curr_path, new_path)

            log.info("parsed bill for %s - %s", data.start, data.end)

            self.driver.find("a#close").click()
            self.driver.sleep(1)
            self.driver.switch_to.window(self.driver.window_handles[-1])
            self.driver.sleep(1)

            # upload PDF:
            key = hash_bill(
                utility_account_id,
                data.start,
                data.end,
                data.cost,
                data.peak,
                data.used,
            )

            with open(new_path, "rb") as pdf_data:
                attachment_entry = upload_bill_to_s3(
                    BytesIO(pdf_data.read()),
                    key,
                    source="www.duke-energy.com",
                    statement=data.end,
                    utility=utility,
                    utility_account_id=utility_account_id,
                )

            if attachment_entry:
                data = data._replace(attachments=[attachment_entry])

            billing_data.append(data)

            # Click Bill Information in breadcrumbs to go back to bills list page
            self.driver.find("a#billInformation").click()

        return billing_data

Ejemplo n.º 9

0

Mostrar archivo

    def get_bills(self, account_id: str, start: date,
                  end: date) -> List[BillingDatum]:
        """Get bills from the table.

        for each row:
          get end from Read date column (date)
          get start date from end date - (Days column (date) - 1)
          get statement date from Bill date column (date)
          if not start - end overlaps passed in start / end, continue
          get peak from On-peak Billed kW (float)
          get used from (Off-peak kWh + Shoulder kWh + On-peak kWh) (float)
          get cost from New charges (float)
          click eye icon to download PDF; wait for download to complete to self.driver.download_dir
        """
        WebDriverWait(self.driver, 10).until(
            EC.presence_of_element_located(self.UsageTableBodyLocator))
        usage_table_rows = self.driver.find_elements(
            *self.UsageTableRowsLocator)

        bill_data: List[BillingDatum] = []
        self.driver.screenshot(BaseWebScraper.screenshot_path("bill table"))
        for row in usage_table_rows:
            cols = row.find_elements_by_tag_name("td")
            cols = [
                c for c in cols
                if "display: none" not in c.get_attribute("style")
            ]

            col = lambda x: cols[x].text
            to_num = lambda x: "".join(d for d in col(x)
                                       if d.isdigit() or d == ".")
            to_float = lambda x: float(to_num(x)) if len(to_num(x)) > 0 else 0

            log.debug(f"statement={col(1)} end={col(2)} days={col(7)}")
            # statement date
            statement_date = date_parser.parse(col(1)).date()

            # bill end
            period_year = statement_date.year
            if statement_date.month == 1 and col(2).startswith("12"):
                period_year = statement_date.year - 1
            end_str = f"{col(2)}/{period_year}"
            bill_end = date_parser.parse(end_str).date()

            # bill start
            bill_start = bill_end - timedelta(days=int(to_float(7)) - 1)
            log.debug(f"start={bill_start} end={bill_end}")

            if not self._overlap(start, end, bill_start, bill_end):
                log.info(
                    f"skipping bill {bill_start} - {bill_end}: does not overlap requested range {start} - {end}"
                )
                continue

            # cost
            new_charges = to_float(8)
            # used
            used = to_float(4) + to_float(5) + to_float(6)
            # peak
            peak = to_float(3)

            bill_datum = BillingDatum(
                start=bill_start,
                end=bill_end,
                statement=statement_date,
                cost=new_charges,
                used=used,
                peak=peak,
                items=None,
                attachments=None,
                utility_code=None,
            )

            try:
                bill_pdf_name = "SRPbill{}{}.pdf".format(
                    statement_date.strftime("%B"), statement_date.year)
                pdf_download_link = cols[0].find_element_by_tag_name("a")
                scroll_to(self.driver, pdf_download_link)
                pdf_download_link.click()
                log.info("looking for %s in %s", bill_pdf_name,
                         self.driver.download_dir)
                self.driver.wait(60).until(
                    file_exists_in_dir(self.driver.download_dir,
                                       bill_pdf_name))
            except Exception as e:
                raise Exception(
                    f"Failed to download bill {bill_pdf_name} for statement date {statement_date}:\n {e}"
                )
            log.info(
                f"Bill {bill_pdf_name} for statement date {statement_date} downloaded successfully"
            )

            attachment_entry = None
            # open downloaded PDF and upload
            if config.enabled("S3_BILL_UPLOAD"):
                key = hash_bill_datum(account_id, bill_datum)
                with open(f"{self.driver.download_dir}/{bill_pdf_name}",
                          "rb") as pdf_data:
                    attachment_entry = upload_bill_to_s3(
                        BytesIO(pdf_data.read()),
                        key,
                        source="myaccount.srpnet.com",
                        statement=bill_datum.statement,
                        utility="utility:salt-river-project",
                        utility_account_id=account_id,
                    )
            if attachment_entry:
                bill_data.append(
                    bill_datum._replace(attachments=[attachment_entry]))
            else:
                bill_data.append(bill_datum)
        return bill_data

Ejemplo n.º 10

0

Mostrar archivo

Archivo: portland_bizportal.py Proyecto: gnoose/datafeeds-shared

def extract_bill_data(pdf_filename, service_id, utility,
                      utility_account_id) -> Optional[BillingDatum]:
    # this function should upload the file to s3 to set attachments?
    try:
        text = pdf_to_str(pdf_filename)
    except PDFSyntaxError:
        log.exception("Downloaded bill file failed to parse as a PDF.")
        return None

    current_charges_pattern = "Current Charges(.*?)Cycle"
    for line in (re.search(current_charges_pattern, text,
                           re.DOTALL).group(1).split("\n")):
        # get the last number
        if re.match(r"[\d,\.]", line.strip()):
            current_charges = line.strip().replace(",", "")

    period_start, period_end = extract_bill_period(pdf_filename)

    usage_pattern = r"Energy Charges \((\d*) kWh\)"
    usage = re.search(usage_pattern, text).groups()[0]

    on_peak_demand_pattern = r"On-Peak Demand \((\d+\.\d+)\ KW"
    on_peak_demand = re.search(on_peak_demand_pattern, text).groups()[0]

    offpeak_demand_pattern = r"Off-Peak Demand \((\d+\.\d+)\ KW"
    offpeak_demand = re.search(offpeak_demand_pattern, text).groups()[0]

    bill_attachment = []
    if config.enabled("S3_BILL_UPLOAD"):
        log.info("S3_BILL_UPLOAD is enabled")
        with open(pdf_filename, "rb") as f:
            key = hash_bill(
                service_id,
                period_start,
                period_end,
                _format_number(current_charges),
                0,
                _format_number(usage),
            )
            # no statement date; use end date
            bill_attachment.append(
                upload_bill_to_s3(
                    f,
                    key,
                    source="portlandgeneral.com",
                    statement=period_end,
                    utility=utility,
                    utility_account_id=utility_account_id,
                ))
            log.info("Uploaded bill %s to s3", bill_attachment)

    bill = BillingDatum(
        start=period_start,
        end=period_end,
        statement=period_end,
        cost=_format_number(current_charges),
        used=_format_number(usage),
        peak=max(
            float(on_peak_demand),
            float(offpeak_demand),
        ),
        items=[],
        attachments=bill_attachment,
        utility_code=None,
    )

    return bill

Ejemplo n.º 11

0

Mostrar archivo

def parse_poway_pdf(pdf_filename: str, account_id: str) -> BillingDatum:
    text = pdfparser.pdf_to_str(pdf_filename)

    used_pattern = r"Consumption (?P<units_used>[\d\.,]+) @"
    cost_pattern = r"(?P<water_charges>[\d\.,]+)\s+WATERBasic Service @"

    # date format: m/d/yyyy
    date_pattern = r"\d{1,2}\/\d{1,2}\/\d{4}"
    dates_pattern = (
        r"Total Current Charges.+?"
        fr"(?P<read_date_start>{date_pattern}) - (?P<read_date_end>{date_pattern})"
        fr"(?P<due_date>{date_pattern})"
        fr"(?P<statement_date>{date_pattern})")

    dates_match = re.search(dates_pattern, text)
    if not dates_match:
        raise InvalidMeterDataException(
            f"Couldn't parse dates from pdf: {text}")

    _dates = dates_match.group("read_date_start", "read_date_end",
                               "statement_date")
    start_date, end_date, statement_date = [
        parse_date(_date).date() for _date in _dates
    ]

    used_match = re.search(used_pattern, text)
    if not used_match:
        raise InvalidMeterDataException(
            "fCouldn't parse usage from pdf: {text}")

    used_text = used_match.group("units_used")
    used = float(used_text.replace(",", "").replace("$", ""))

    cost_match = re.search(cost_pattern, text)
    if not cost_match:
        raise InvalidMeterDataException(
            f"Couldn't parse cost from pdf: {text}")

    cost_text = cost_match.group("water_charges")
    cost = float(cost_text.replace(",", "").replace("$", ""))

    if config.enabled("S3_BILL_UPLOAD"):
        key = hash_bill(account_id, start_date, end_date, cost, 0, used)
        with open(pdf_filename, "rb") as pdf_data:
            attachments = [
                upload_bill_to_s3(
                    BytesIO(pdf_data.read()),
                    key,
                    source="customerconnect.poway.org",
                    statement=statement_date,
                    utility="utility:city-of-poway",
                    utility_account_id=account_id,
                )
            ]
    else:
        attachments = []
    return BillingDatum(
        start=start_date,
        end=end_date - timedelta(days=1),
        statement=statement_date,
        cost=cost,
        peak=None,
        used=used,
        items=None,
        attachments=attachments,
        utility_code=None,
    )

Ejemplo n.º 12

0

Mostrar archivo

Archivo: bill_pdf.py Proyecto: gnoose/datafeeds-shared

    def download_bills(
        self,
        latest: date,
        utility_account: str,
        utility: str,
        gen_utility: Optional[str] = None,
        gen_utility_account_id: Optional[str] = None,
    ) -> List[BillPdf]:
        """Download bill PDFs for the specified date range."""
        pdfs: List[BillPdf] = []
        log.info("Opening billing history")

        click(self._driver, css_selector="#arrowBillPaymentHistory")

        self.wait_until_ready(self.BillingHistoryTableSel)
        self._driver.screenshot(
            BaseWebScraper.screenshot_path("bill history arrow"))
        wait_for_block_overlay(self._driver)

        log.info("Clicking 'view up to..' link")

        click(self._driver, css_selector=self.ViewMoreHistorySel)
        self.wait_until_ready(self.BillingHistoryTableSel)

        self._driver.screenshot(BaseWebScraper.screenshot_path("panels"))

        panels_count = len(
            self._driver.find_elements_by_css_selector(self.PanelxSel))
        log.info(f"found {panels_count} panels in billing widget")

        # Rather than get all matching elements and iterate through, use index
        # and manually get element each time to help avoid stale element errors
        for i in range(0, panels_count):
            panel = self._driver.find_elements_by_css_selector(
                self.PanelxSel)[i]

            # check if is a payment panel
            panel_header = panel.find_element_by_css_selector(".panel-title")
            header_text = panel_header.text
            if "Payment" in header_text:
                log.debug(f"Skipping panel {i} (payment)")
                # skip if is a payment panel
                continue

            log.debug(f"Processing panel {i} (bill): {header_text}")

            link_elem = panel.find_element_by_css_selector(
                "div.pge_coc-dashboard-viewPay_billed_history_panel_viewBill_para_block"
                " a.viewBill")
            # Get date from the "data-date" attribute on link to download bill...
            # data-date is in milliseconds
            timestamp = int(link_elem.get_attribute("data-date")) / 1000.0

            # when bill was issued
            bill_date = datetime.fromtimestamp(timestamp).date()
            # bill issued about a week after end date; use this window to match dates
            approx_bill_end = bill_date - timedelta(days=7)
            approx_bill_start = approx_bill_end - timedelta(days=30)
            log.debug(f"bill date={bill_date}")

            # cost is in second column
            cost_text = panel.find_element_by_css_selector(
                "td.text-right").text
            log.debug(f"cost text={cost_text}")
            # cost with $ and commas: $1,234.56 or -$1,234.56
            cost = float(cost_text.replace("$", "").replace(",", ""))

            log.info(f"Found bill issued {bill_date} with cost ${cost}")

            if approx_bill_end <= latest:
                log.info(
                    f"ignoring bill, date: {approx_bill_end} already download")
                continue

            try:
                click(self._driver, elem=link_elem)
            except ElementNotInteractableException:
                log.info("Download link not visible; looking for other")

                link_elem = panel.find_element_by_css_selector(
                    "div#billSummaryContainer a.viewBill")

                click(self._driver, elem=link_elem)
            except ElementClickInterceptedException as exc:
                log.info("download link failed: %s %s", exc, exc.msg)
                close_modal(self._driver)
                continue

            last4 = self.account_id.split("-")[0][6:10]
            filename = f"{last4}custbill{bill_date.strftime('%m%d%Y')}.pdf"
            download_dir = "%s/current" % config.WORKING_DIRECTORY

            try:
                self._driver.wait(60).until(
                    file_exists_in_dir(
                        # end pattern with $ to prevent matching filename.crdownload
                        directory=download_dir,
                        pattern=f"^{filename}$",
                    ))
            except TimeoutException:
                log.error(
                    f"ERROR waiting for file {filename} to download...skipping"
                )
                # close the download failed modal if there is one
                close_modal(self._driver)
                continue

            with open("%s/%s" % (download_dir, filename), "rb") as f:
                key = hash_bill(self.account_id, approx_bill_start,
                                approx_bill_end, cost, "", "")

                upload_bill_to_s3(
                    file_handle=f,
                    key=key,
                    source="pge.com",
                    statement=bill_date,
                    utility=utility,
                    utility_account_id=utility_account,
                    gen_utility=gen_utility,
                    gen_utility_account_id=gen_utility_account_id,
                )

            log.info(f"Uploaded {filename} to {key}")
            pdfs.append(
                BillPdf(
                    utility_account_id=utility_account,
                    gen_utility_account_id=gen_utility,
                    start=approx_bill_start,
                    end=approx_bill_end,
                    statement=bill_date,
                    s3_key=key,
                ))

        return pdfs

Ejemplo n.º 13

0

Mostrar archivo

Archivo: ladwp_bill_pdf.py Proyecto: gnoose/datafeeds-shared

    def _execute(self):
        # Direct the driver to the login page
        self._driver.get(self.login_url)
        # Create page helpers
        login_page = LoginPage(self._driver)
        my_account_page = MyAccountPage(self._driver)
        bill_history_page = BillHistoryPage(self._driver)

        try:
            login_page.wait_until_ready()
        except Exception:
            self.screenshot("initial page load failed")
            # try one more time
            self._driver.get(self.login_url)
            login_page.wait_until_ready()
        login_page.login(self.username, self.password)
        self.screenshot("after login")

        my_account_page.wait_until_ready()
        my_account_page.navigate_to_bill_history()
        self.screenshot("bill history")

        if bill_history_page.too_many_sessions():
            # waiting 5 minutes doesn't seem to help
            bill_history_page.logout()
            raise Exception("too many sessions")
        bill_history_page.wait_until_ready()
        self.screenshot("after captcha")
        if not bill_history_page.solve_captcha():
            bill_history_page.logout()
            raise Exception("captcha failed")

        bill_history_page.wait_until_bills_ready()
        bill_history_page.select_account(
            self._configuration.utility_account_id,
            self._configuration.account_name)
        bill_history_page.wait_until_bills_ready()
        bill_history_page.download_bills(self.start_date, self.end_date)
        bill_history_page.logout()
        # get bills from download directory and parse

        bills: List[BillingDatum] = []
        prefix = f"{config.WORKING_DIRECTORY}/current"

        log.info("Waiting for downloads to finish")
        while any(".pdf.crdownload" in f for f in os.listdir(prefix)):
            # Wait for downloads to finish
            time.sleep(1)
            continue

        start_dates: Set[date] = set()
        for filename in sorted(os.listdir(prefix)):
            if ".pdf" not in filename:
                continue

            log.info("parsing file %s" % filename)
            parsed_bills = parse_pdf(f"{prefix}/{filename}", self.meter_number,
                                     self.commodity)
            log.info(f"filename {filename} bills={parsed_bills}")
            if not parsed_bills:
                log.warning(f"no billing datum: filename={filename}")
                continue
            with open(prefix + "/" + filename, "rb") as pdf_data:
                bill = parsed_bills[0]
                key = hash_bill(
                    self._configuration.utility_account_id,
                    bill.start,
                    bill.end,
                    bill.cost,
                    bill.peak,
                    bill.used,
                )
                attachment_entry = upload_bill_to_s3(
                    BytesIO(pdf_data.read()),
                    key,
                    source="www.ladwp.com",
                    statement=bill.end,
                    utility="utility:ladwp",
                    utility_account_id=self._configuration.utility_account_id,
                )
            for bill in parsed_bills:
                attachments = [attachment_entry]
                if bill.start in start_dates:
                    # if we already have a bill with this start date, replace it
                    prev_bill = [b for b in bills if b.start == bill.start][0]
                    log.info(
                        "duplicate bill start: prev_bill = %s, bill = %s",
                        prev_bill,
                        bill,
                    )
                    bills.remove(prev_bill)
                    # copy the attachment
                    attachments += prev_bill.attachments
                bills.append(bill._replace(attachments=attachments))
                start_dates.add(bill.start)

        return Results(bills=bills)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: pacific_power_billing.py Proyecto: gnoose/datafeeds-shared

    def _execute(self):
        if self.end_date - self.start_date < timedelta(days=MINIMUM_BILL_DAYS):
            log.info(
                f"Expanding date range to a minimum of {MINIMUM_BILL_DAYS} days."
            )
            self.start_date = self.end_date - timedelta(days=MINIMUM_BILL_DAYS)

        start_date = max(self.start_date,
                         (datetime.now() - relativedelta(years=10)).date())
        end_date = min(self.end_date, (datetime.now().date()))

        log.info("Final date range to search: %s - %s" %
                 (start_date, end_date))

        login_page = LoginPage(self._driver)
        home_page = login_page.login(self.username, self.password)
        self.screenshot("home_screen")
        log.info("Login successful.")

        bill_history_page = home_page.to_bill_history()
        self.screenshot("bill_history_page")
        log.info("Loaded bill history.")

        bill_history_page.select_account(self.account_number)
        self.screenshot("account_selected")
        log.info("Selected account.")

        bill_history_page.set_dates(start_date, end_date)
        self.screenshot("dates_selected")
        log.info("Selected dates.")

        raw_pdfs = bill_history_page.gather_data()

        log.info("PDF bills captured: %s" % len(raw_pdfs))
        log.info("Net bill pdf bytes captured: %s" %
                 (sum(len(x) for x in raw_pdfs)))

        ii = 0
        bill_data = []
        for b in raw_pdfs:
            ii += 1
            bill_datum = parse_bill_pdf(BytesIO(b), self.meter_number)

            if bill_datum is None:
                log.info("There was a problem parsing a bill PDF #%d." % ii)
                continue

            attachment_entry = None
            if config.enabled("S3_BILL_UPLOAD"):
                key = bill_upload.hash_bill_datum(self.meter_number,
                                                  bill_datum)
                attachment_entry = bill_upload.upload_bill_to_s3(
                    BytesIO(b),
                    key,
                    source="pacificpower.net",
                    statement=bill_datum.statement,
                    utility=self.utility,
                    utility_account_id=self.account_number,
                )

            if attachment_entry:
                bill_data.append(
                    bill_datum._replace(attachments=[attachment_entry]))
            else:
                bill_data.append(bill_datum)

        final_bills = adjust_bill_dates(bill_data)
        show_bill_summary(final_bills, "Final Bill Summary")
        return Results(bills=final_bills)