Python file_exists_in_dirの例、datafeeds.common.util.selenium.file_exists_in_dir Pythonの例

コード例 #1

0

ファイルを表示

    def download_as_csv(self) -> str:
        download_as_csv_sel = '//*[text()="Download as CSV Spreadsheet"]'

        # click the download options button on top right of chart
        self.find_element("g.highcharts-exporting-group").click()

        # click Download as CSV
        self._driver.find_or_raise(download_as_csv_sel, xpath=True).click()

        log.info("Waiting for file to Download")

        try:
            filename = self._driver.wait(30).until(
                file_exists_in_dir(
                    directory=self._driver.download_dir,
                    pattern=r"^demand_heatmap_.+?\.csv$",
                ))
        except Exception:
            raise Exception("Unable to download file...")

        log.info(f"Download complete: {filename}")

        filepath = os.path.join(self._driver.download_dir, filename)

        # rename the file to avoid matching the wrong file in future
        new_filename = f"{date}_demand_heatmap.csv"
        new_filepath = os.path.join(self._driver.download_dir, new_filename)
        os.rename(filepath, new_filepath)

        return new_filepath

コード例 #2

0

ファイルを表示

 def download_csv(self) -> str:
     download_csv_xpath = "//div[contains(text(), 'Download CSV Data')]"
     download_csv = self.driver.find_element_by_xpath(download_csv_xpath)
     download_csv.click()
     download_dir = self.driver.download_dir
     filename = self.driver.wait(60).until(
         file_exists_in_dir(download_dir, r".*\.csv$"))
     file_path = os.path.join(download_dir, filename)
     return file_path

コード例 #3

0

ファイルを表示

    def download_file(self, extension: str, timeout: Optional[int] = 60):
        # Wait for csv to download
        wait = WebDriverWait(self._driver, timeout)
        download_dir = self._driver.download_dir
        filename = wait.until(
            file_exists_in_dir(download_dir, r".*\.{}".format(extension)))
        file_path = os.path.join(download_dir, filename)

        return file_path

コード例 #4

0

ファイルを表示

def wait_for_download(driver, timeout=60):
    """Wait for the report download to finish."""
    wait = WebDriverWait(driver, timeout)
    download_dir = driver.download_dir

    filename = wait.until(
        file_exists_in_dir(download_dir, r"^%s$" % RAW_REPORT_NAME))
    filepath = os.path.join(download_dir, filename)
    return filepath

コード例 #5

0

ファイルを表示

ファイル: socalgas.py プロジェクト: gnoose/datafeeds-shared

    def _download_zip_file(self, start_date, end_date):
        # These input fields come pre-filled and 'clear()' does not work
        for i in range(10):
            self._driver.find_element_by_id("FromDate").send_keys(
                Keys.BACKSPACE)
        for i in range(10):
            self._driver.find_element_by_id("ToDate").send_keys(Keys.BACKSPACE)
        time.sleep(1)

        date_format = "%m/%d/%Y"
        self._driver.find_element_by_id("FromDate").send_keys(
            start_date.strftime(date_format))
        self._driver.find_element_by_id("ToDate").send_keys(
            end_date.strftime(date_format))

        # If the scraper tries to go farther back than allowed, an error message is displayed
        # Clicking the calendar button will then set it to the earliest possible good date.
        minimal_date_error_xpath = (
            "//p[contains(text(), 'Date should not be before minimal date')]")
        try:
            minimal_date_error = self._driver.find_element_by_xpath(
                minimal_date_error_xpath)

            if minimal_date_error.is_displayed():
                log.warning(
                    "Attempted start date %s before data exists",
                    start_date.strftime(date_format),
                )
                # The start and end calendar buttons have this same identifier,
                # so we're assuming the 'before' is found first
                calendar_button_xpath = "//button[@arialabel='change date']"
                calendar_button = self._driver.find_element_by_xpath(
                    calendar_button_xpath)
                calendar_button.click()
        except NoSuchElementException:
            pass

        export_xpath = "//span[contains(text(), 'Export')]"
        self._driver.find(export_xpath, xpath=True).click()
        download_dir = self._driver.download_dir
        # Filename example: SoCalGas_Gas_60_Minute_7-7-19_8-7-20.zip
        # strftime pads with zeros, which doesn't work here
        start_date_string = (str(start_date.month) + "-" +
                             str(start_date.day) + "-" +
                             str(start_date.year)[2:])
        end_date_string = (str(end_date.month) + "-" + str(end_date.day) +
                           "-" + str(end_date.year)[2:])
        expected_filename = ("SoCalGas_Gas_60_Minute_" + start_date_string +
                             "_" + end_date_string + ".zip")
        log.info("\t\tDownloading file {}".format(expected_filename))
        download_name = self._driver.wait(300).until(
            file_exists_in_dir(download_dir, expected_filename))
        log.info("\t\tFile downloaded")
        # Truncate file extension
        return str(download_name)[:-4]

コード例 #6

0

ファイルを表示

def wait_for_download(driver, timeout=60):
    """Wait for a download to finish.

    In particular, wait for a csv file to show up in the download directory.
    """
    wait = WebDriverWait(driver, timeout)
    download_dir = driver.download_dir

    filename = wait.until(file_exists_in_dir(download_dir, r".*\.csv$"))
    filepath = os.path.join(download_dir, filename)
    return filepath

コード例 #7

0

ファイルを表示

 def download_csv(self) -> str:
     # in an svg and the div id seems to change
     download_csv_xpath = (
         "//*[name() = 'div' and starts-with(@id, 'highcharts-')]"
         "/*[name() = 'div'][3]/*[name() = 'div']/*[name() = 'div'][2]")
     # download_csv = self.driver.find_element_by_xpath(download_csv_xpath)
     download_csv = self.driver.wait().until(
         EC.element_to_be_clickable((By.XPATH, download_csv_xpath)))
     download_csv.click()
     download_dir = self.driver.download_dir
     filename = self.driver.wait(60).until(
         file_exists_in_dir(download_dir, r".*\.csv$"))
     file_path = os.path.join(download_dir, filename)
     return file_path

コード例 #8

0

ファイルを表示

ファイル: energymanager_billing.py プロジェクト: gnoose/datafeeds-shared

    def download_pdf_for_billing_row(self, billing_row: sce_pages.BillingDataRow):
        error_indicator = "BILL_NOT_AVAILABLE"

        # This is a helper function to help detect when a bill pdf is not available.
        # Somewhat clumsily, it returns a special flag when it detects the error page,
        # which we later check against the return value of the wait clause below.
        def download_error_page_visible(driver):
            locator = (
                By.XPATH,
                "//react-energy-manager//div[contains(@class, 'ServiceAcctBillList__dialogboxError')]",
            )
            query = driver.find_elements(*locator)
            if query:
                return error_indicator
            return None

        billing_row.selector.click()
        time.sleep(2)
        self._driver.find_element_by_id("viewBill").click()
        download_dir = self._driver.download_dir

        # Either the file will download, or SCE will show us an error modal indicating that the bill was
        # not available. If the error page is found, "result" will hold the value in error_indicator,
        # defined above. Else, it will hold the name of the file in the download directory.
        result = WebDriverWait(self._driver, 120).until(
            ec_or(
                download_error_page_visible,
                file_exists_in_dir(download_dir, r".*\.pdf$"),
            )
        )

        if result == error_indicator:
            # We need to make sure to close the modal that appears on error
            close_button_locator = (
                By.XPATH,
                "//react-energy-manager//button[contains(@class, 'sceDialogBox__crossButtonDialogBox')]",
            )
            self._driver.find_element(*close_button_locator).click()
            time.sleep(5)
            return None

        return os.path.join(download_dir, result)

コード例 #9

0

ファイルを表示

    def _export_data(self) -> str:
        log.info("Exporting data.")
        with IFrameSwitch(self._driver, "childFrame"):
            with IFrameSwitch(self._driver, "frame3"):
                self._driver.wait().until(
                    EC.element_to_be_clickable(
                        (By.XPATH, self.export_data_xpath)))
                self._driver.sleep(2)

                export_button = self._driver.find_element_by_xpath(
                    self.export_data_xpath)
                self._driver.execute_script("arguments[0].click();",
                                            export_button)

                # Wait for csv to download
                download_dir = self._driver.download_dir
                filename = self._driver.wait(60).until(
                    file_exists_in_dir(download_dir, r".*\.csv$"))
                file_path = os.path.join(download_dir, filename)
                return file_path

コード例 #10

0

ファイルを表示

    def download_data(self, meter_number: str) -> str:
        """Download data to the working directory.

        Click Download Data button.
        Saves to config.WORKING_DIRECTORY/15_minute_download.csv
        Rename the downloaded file to config.WORKING_DIRECTORY/{meter_number}.csv
        Return: the path of the downloaded csv file.
        """
        # wait for the download button to be ready
        self.wait_until_ready(self.DownloadBtnSel)

        log.info("Beginning download...")
        self.find_element(self.DownloadBtnSel).click()

        # download filename is always 15_minute_download.csv for 15 minute intervals
        filename = "15_minute_download.csv"
        download_dir = "%s/current" % config.WORKING_DIRECTORY

        try:
            self._driver.wait(30).until(
                file_exists_in_dir(
                    # end pattern with $ to prevent matching
                    # filename.crdownload
                    directory=download_dir,
                    pattern=f"^{filename}$",
                ))
        except Exception:
            raise Exception("Unable to download file...")

        log.info("Download complete")

        csv_file_path = os.path.join(download_dir, meter_number + ".csv")

        # rename downloaded filename to {meter_number}.csv for
        # avoiding filename conflict in case of multiple accounts
        os.rename(os.path.join(download_dir, filename), csv_file_path)

        return csv_file_path

コード例 #11

0

ファイルを表示

ファイル: billing.py プロジェクト: gnoose/datafeeds-shared

    def wait_for_bill_download(self, bill_date):
        """Wait for document.pdf to download.

        Returns absolute path of the downloaded file.
        """

        download_dir = config.WORKING_DIRECTORY + "/current"
        try:
            filename = self.driver.wait(30).until(
                file_exists_in_dir(
                    directory=download_dir,
                    pattern=r"^document.pdf$",
                ))
        except Exception:
            raise Exception("Unable to download file...")

        curr_filepath = os.path.join(download_dir, filename)

        # rename the file to avoid matching the wrong file in future
        filepath = os.path.join(download_dir, f"{bill_date}_bill.pdf")
        os.rename(curr_filepath, filepath)

        return filepath

コード例 #12

0

ファイルを表示

    def download_pdfs(self, start_date: date, end_date: date):
        download_dir = self._driver.download_dir
        downloaded_pdfs_dir = os.path.join(download_dir, "downloaded")
        os.makedirs(downloaded_pdfs_dir, exist_ok=True)

        data_table = self.find_element(self.DataTableSelector)
        data_rows = data_table.find_elements_by_css_selector("tbody > tr")

        for row in data_rows:
            bill_date = row.find_element_by_css_selector(
                'td[data-title="Bill Date"]').text

            bill_date = parse_date(bill_date).date()

            if start_date < bill_date < end_date:
                view_bill_link = row.find_element_by_css_selector(
                    'td[data-title="View eBill"] > a')
                view_bill_link.click()
                filename = self._driver.wait().until(
                    file_exists_in_dir(download_dir, r".*\.pdf$"))
                file_path = os.path.join(download_dir, filename)
                new_file_path = os.path.join(downloaded_pdfs_dir, filename)
                os.rename(file_path, new_file_path)
                log.info(f"file downloaded: {new_file_path}")

コード例 #13

0

ファイルを表示

ファイル: bill_pdf.py プロジェクト: gnoose/datafeeds-shared

    def download_bills(
        self,
        latest: date,
        utility_account: str,
        utility: str,
        gen_utility: Optional[str] = None,
        gen_utility_account_id: Optional[str] = None,
    ) -> List[BillPdf]:
        """Download bill PDFs for the specified date range."""
        pdfs: List[BillPdf] = []
        log.info("Opening billing history")

        click(self._driver, css_selector="#arrowBillPaymentHistory")

        self.wait_until_ready(self.BillingHistoryTableSel)
        self._driver.screenshot(
            BaseWebScraper.screenshot_path("bill history arrow"))
        wait_for_block_overlay(self._driver)

        log.info("Clicking 'view up to..' link")

        click(self._driver, css_selector=self.ViewMoreHistorySel)
        self.wait_until_ready(self.BillingHistoryTableSel)

        self._driver.screenshot(BaseWebScraper.screenshot_path("panels"))

        panels_count = len(
            self._driver.find_elements_by_css_selector(self.PanelxSel))
        log.info(f"found {panels_count} panels in billing widget")

        # Rather than get all matching elements and iterate through, use index
        # and manually get element each time to help avoid stale element errors
        for i in range(0, panels_count):
            panel = self._driver.find_elements_by_css_selector(
                self.PanelxSel)[i]

            # check if is a payment panel
            panel_header = panel.find_element_by_css_selector(".panel-title")
            header_text = panel_header.text
            if "Payment" in header_text:
                log.debug(f"Skipping panel {i} (payment)")
                # skip if is a payment panel
                continue

            log.debug(f"Processing panel {i} (bill): {header_text}")

            link_elem = panel.find_element_by_css_selector(
                "div.pge_coc-dashboard-viewPay_billed_history_panel_viewBill_para_block"
                " a.viewBill")
            # Get date from the "data-date" attribute on link to download bill...
            # data-date is in milliseconds
            timestamp = int(link_elem.get_attribute("data-date")) / 1000.0

            # when bill was issued
            bill_date = datetime.fromtimestamp(timestamp).date()
            # bill issued about a week after end date; use this window to match dates
            approx_bill_end = bill_date - timedelta(days=7)
            approx_bill_start = approx_bill_end - timedelta(days=30)
            log.debug(f"bill date={bill_date}")

            # cost is in second column
            cost_text = panel.find_element_by_css_selector(
                "td.text-right").text
            log.debug(f"cost text={cost_text}")
            # cost with $ and commas: $1,234.56 or -$1,234.56
            cost = float(cost_text.replace("$", "").replace(",", ""))

            log.info(f"Found bill issued {bill_date} with cost ${cost}")

            if approx_bill_end <= latest:
                log.info(
                    f"ignoring bill, date: {approx_bill_end} already download")
                continue

            try:
                click(self._driver, elem=link_elem)
            except ElementNotInteractableException:
                log.info("Download link not visible; looking for other")

                link_elem = panel.find_element_by_css_selector(
                    "div#billSummaryContainer a.viewBill")

                click(self._driver, elem=link_elem)
            except ElementClickInterceptedException as exc:
                log.info("download link failed: %s %s", exc, exc.msg)
                close_modal(self._driver)
                continue

            last4 = self.account_id.split("-")[0][6:10]
            filename = f"{last4}custbill{bill_date.strftime('%m%d%Y')}.pdf"
            download_dir = "%s/current" % config.WORKING_DIRECTORY

            try:
                self._driver.wait(60).until(
                    file_exists_in_dir(
                        # end pattern with $ to prevent matching filename.crdownload
                        directory=download_dir,
                        pattern=f"^{filename}$",
                    ))
            except TimeoutException:
                log.error(
                    f"ERROR waiting for file {filename} to download...skipping"
                )
                # close the download failed modal if there is one
                close_modal(self._driver)
                continue

            with open("%s/%s" % (download_dir, filename), "rb") as f:
                key = hash_bill(self.account_id, approx_bill_start,
                                approx_bill_end, cost, "", "")

                upload_bill_to_s3(
                    file_handle=f,
                    key=key,
                    source="pge.com",
                    statement=bill_date,
                    utility=utility,
                    utility_account_id=utility_account,
                    gen_utility=gen_utility,
                    gen_utility_account_id=gen_utility_account_id,
                )

            log.info(f"Uploaded {filename} to {key}")
            pdfs.append(
                BillPdf(
                    utility_account_id=utility_account,
                    gen_utility_account_id=gen_utility,
                    start=approx_bill_start,
                    end=approx_bill_end,
                    statement=bill_date,
                    s3_key=key,
                ))

        return pdfs

コード例 #14

0

ファイルを表示

    def get_bills(self, utility: str,
                  utility_account_id: str) -> List[BillingDatum]:
        billing_data = []

        available_dates = self.driver.find_elements(
            By.CSS_SELECTOR, "table.table-alt a.bill-view-link")
        available_dates = [parse_date(i.text).date() for i in available_dates]
        log.info("available dates: %s",
                 [dt.strftime("%Y-%m-%d") for dt in available_dates])

        xpath_locators = {
            # Finds the last KWH reading under Total Usage column
            "cost":
            "//table[contains(., 'NEW CHARGES')]/tbody/tr/td[3]",
            "used":
            "(//table[contains(.,'USAGE')]//tr/td[contains(., 'KWH')])",
            "usage_kw":
            "//table[contains(.,'USAGE')]//tr/td[contains(.,'KW') and not(contains(.,'KWH'))]",
        }

        # loop through dates in table in ascending order
        for pdf_date in reversed(available_dates):
            # skip if the date isn't in the specified range
            if not (self.start_date <= pdf_date <= self.end_date):
                log.debug("skipping date outside range: %s", pdf_date)
                continue

            view_bill_link = self.driver.find_element_by_xpath(
                '//a[.="%s"]' % pdf_date.strftime("%m/%d/%Y"))
            scroll_to(self.driver, view_bill_link)

            self.driver.sleep(0.5)
            view_bill_link.click()

            self.driver.wait(30).until(
                EC.visibility_of_element_located(
                    (By.CSS_SELECTOR, "div.billImage")))

            start_date = None
            end_date = None
            cost = None
            used = None
            peak = None

            dates_line_text: str = self.driver.find_element_by_xpath(
                "//td[contains(., 'Service From:')]").text
            dates_match = re.search(
                r"Service From: (?P<from>\w+ \d\d) to (?P<to>\w+ \d\d) \(\d\d Days\)",
                dates_line_text,
            )

            if dates_match:
                # if from month is December, use previous year
                year = (pdf_date.year -
                        1 if "dec" in dates_match.group("from").lower() else
                        pdf_date.year)
                start_date = parse_date("%s %s" %
                                        (dates_match.group("from"), year))
                end_date = parse_date(
                    dates_match.group("to") + pdf_date.strftime(" %Y"))

            cost_match = self.driver.find(xpath_locators["cost"], xpath=True)
            if cost_match:
                cost = cost_match.text
                cost = float(cost.replace("$", "").replace(",", ""))

            kwh_usages = []
            for match in self.driver.find_all(xpath_locators["used"],
                                              xpath=True):
                # include only if it has a reading values as siblings; exclude credit line items
                parent = match.find_element_by_xpath("..")
                # meter number, previous reading, current reading
                readings_text = ""
                for idx, child in enumerate(
                        parent.find_elements_by_xpath(".//td")):
                    log.debug("\t%s\t%s", idx, child.text.strip())
                    readings_text += child.text.strip()
                    if idx == 2:
                        break
                if not readings_text:
                    log.info("skipping non-reading line item: %s", parent.text)
                    continue
                kwh_value = float(
                    match.text.replace("KWH", "").replace(",", "").strip())
                kwh_usages.append(kwh_value)

            if kwh_usages:
                used = sum(kwh_usages)

            kw_usages = []
            for usage_kw_match in self.driver.find_all(
                    xpath_locators["usage_kw"], xpath=True):
                kw_usages.append(
                    float(
                        usage_kw_match.text.replace("KW",
                                                    "").replace(",",
                                                                "").strip()))

            if kw_usages:
                peak = max(kw_usages)

            data = BillingDatum(
                start=start_date,
                end=end_date - timedelta(days=1),
                statement=end_date - timedelta(days=1),
                cost=cost,
                peak=peak,
                used=used,
                items=None,
                attachments=None,
                utility_code=None,
            )

            self.driver.find("a#billImageToPrint").click()
            self.driver.sleep(1)
            self.driver.switch_to.window(self.driver.window_handles[-1])

            # the filename of the printed pdf is f"{current page title}.pdf"
            self.driver.execute_script("window.print();")

            try:
                file_exists_in_dir(directory=self.download_dir,
                                   pattern=r"^Bill View Bill Image.pdf$")
            except Exception:
                raise Exception("Unable to download file for %s" % pdf_date)

            curr_path = os.path.join(self.download_dir,
                                     "Bill View Bill Image.pdf")
            new_path = os.path.join(
                self.download_dir, f"bill_{pdf_date.strftime('%Y-%m-%d')}.pdf")
            os.rename(curr_path, new_path)

            log.info("parsed bill for %s - %s", data.start, data.end)

            self.driver.find("a#close").click()
            self.driver.sleep(1)
            self.driver.switch_to.window(self.driver.window_handles[-1])
            self.driver.sleep(1)

            # upload PDF:
            key = hash_bill(
                utility_account_id,
                data.start,
                data.end,
                data.cost,
                data.peak,
                data.used,
            )

            with open(new_path, "rb") as pdf_data:
                attachment_entry = upload_bill_to_s3(
                    BytesIO(pdf_data.read()),
                    key,
                    source="www.duke-energy.com",
                    statement=data.end,
                    utility=utility,
                    utility_account_id=utility_account_id,
                )

            if attachment_entry:
                data = data._replace(attachments=[attachment_entry])

            billing_data.append(data)

            # Click Bill Information in breadcrumbs to go back to bills list page
            self.driver.find("a#billInformation").click()

        return billing_data

コード例 #15

0

ファイルを表示

    def energy_manager_basic_usage_action(
            self, page: sce_pages.SceEnergyManagerBasicUsagePage):
        sce_pages.detect_and_close_survey(self._driver)
        rval = page.select_service_id(self.service_id)
        log.info("Result of select service id %s: %s", self.service_id, rval)
        self.screenshot("select_service_id")
        page.configure_report()

        date_range = DateRange(self.start_date, self.end_date)
        # the website seems to time out when trying to get more than this amount of data
        interval_size = relativedelta(days=7)
        timeline = Timeline(self.start_date, self.end_date)

        for idx, subrange in enumerate(
                date_range.split_iter(delta=interval_size)):
            log.info("Requesting interval data for dates: %s", subrange)
            start = subrange.start_date
            end = subrange.end_date

            page.set_time_range(start, end)
            self.screenshot("set_time_range")

            try:
                page.generate_report()
                time.sleep(5)
                WebDriverWait(self._driver, 180).until(
                    EC.invisibility_of_element_located(
                        sce_pages.GenericBusyIndicatorLocator))
                self.screenshot(f"interval{idx}")
            except Exception as e:
                raise sce_errors.EnergyManagerReportException(
                    "Failed to load data from Energy Manager") from e

            try:
                page.raise_on_report_error()
            except sce_errors.EnergyManagerDataNotFoundException:
                log.info("No data found for this time range, continuing...")
                # If a given date range has no interval data, just move on to the next one
                continue

            log.info("Downloading the interval data report.")
            self.clear_csv_downloads()

            try:
                page.download_report()
            except Exception as e:
                raise sce_errors.EnergyManagerReportException(
                    "Failed to load data from Energy Manager") from e

            try:
                # Wait two minutes for the download to finish
                wait = WebDriverWait(self._driver, 120)
                csv_file_name = wait.until(
                    file_exists_in_dir(self._driver.download_dir, r".*\.csv"))
                csv_file_path = os.path.join(self._driver.download_dir,
                                             csv_file_name)
                for reading in parse_sce_csv_file(csv_file_path,
                                                  self.service_id):
                    timeline.insert(reading.dt, reading.value)
            except TimeoutException:
                raise TimeoutException(
                    "Downloading interval data from Energy Manager failed.")

        self.interval_data_timeline = timeline

コード例 #16

0

ファイルを表示

    def get_bills(self, account_id: str, start: date,
                  end: date) -> List[BillingDatum]:
        """Get bills from the table.

        for each row:
          get end from Read date column (date)
          get start date from end date - (Days column (date) - 1)
          get statement date from Bill date column (date)
          if not start - end overlaps passed in start / end, continue
          get peak from On-peak Billed kW (float)
          get used from (Off-peak kWh + Shoulder kWh + On-peak kWh) (float)
          get cost from New charges (float)
          click eye icon to download PDF; wait for download to complete to self.driver.download_dir
        """
        WebDriverWait(self.driver, 10).until(
            EC.presence_of_element_located(self.UsageTableBodyLocator))
        usage_table_rows = self.driver.find_elements(
            *self.UsageTableRowsLocator)

        bill_data: List[BillingDatum] = []
        self.driver.screenshot(BaseWebScraper.screenshot_path("bill table"))
        for row in usage_table_rows:
            cols = row.find_elements_by_tag_name("td")
            cols = [
                c for c in cols
                if "display: none" not in c.get_attribute("style")
            ]

            col = lambda x: cols[x].text
            to_num = lambda x: "".join(d for d in col(x)
                                       if d.isdigit() or d == ".")
            to_float = lambda x: float(to_num(x)) if len(to_num(x)) > 0 else 0

            log.debug(f"statement={col(1)} end={col(2)} days={col(7)}")
            # statement date
            statement_date = date_parser.parse(col(1)).date()

            # bill end
            period_year = statement_date.year
            if statement_date.month == 1 and col(2).startswith("12"):
                period_year = statement_date.year - 1
            end_str = f"{col(2)}/{period_year}"
            bill_end = date_parser.parse(end_str).date()

            # bill start
            bill_start = bill_end - timedelta(days=int(to_float(7)) - 1)
            log.debug(f"start={bill_start} end={bill_end}")

            if not self._overlap(start, end, bill_start, bill_end):
                log.info(
                    f"skipping bill {bill_start} - {bill_end}: does not overlap requested range {start} - {end}"
                )
                continue

            # cost
            new_charges = to_float(8)
            # used
            used = to_float(4) + to_float(5) + to_float(6)
            # peak
            peak = to_float(3)

            bill_datum = BillingDatum(
                start=bill_start,
                end=bill_end,
                statement=statement_date,
                cost=new_charges,
                used=used,
                peak=peak,
                items=None,
                attachments=None,
                utility_code=None,
            )

            try:
                bill_pdf_name = "SRPbill{}{}.pdf".format(
                    statement_date.strftime("%B"), statement_date.year)
                pdf_download_link = cols[0].find_element_by_tag_name("a")
                scroll_to(self.driver, pdf_download_link)
                pdf_download_link.click()
                log.info("looking for %s in %s", bill_pdf_name,
                         self.driver.download_dir)
                self.driver.wait(60).until(
                    file_exists_in_dir(self.driver.download_dir,
                                       bill_pdf_name))
            except Exception as e:
                raise Exception(
                    f"Failed to download bill {bill_pdf_name} for statement date {statement_date}:\n {e}"
                )
            log.info(
                f"Bill {bill_pdf_name} for statement date {statement_date} downloaded successfully"
            )

            attachment_entry = None
            # open downloaded PDF and upload
            if config.enabled("S3_BILL_UPLOAD"):
                key = hash_bill_datum(account_id, bill_datum)
                with open(f"{self.driver.download_dir}/{bill_pdf_name}",
                          "rb") as pdf_data:
                    attachment_entry = upload_bill_to_s3(
                        BytesIO(pdf_data.read()),
                        key,
                        source="myaccount.srpnet.com",
                        statement=bill_datum.statement,
                        utility="utility:salt-river-project",
                        utility_account_id=account_id,
                    )
            if attachment_entry:
                bill_data.append(
                    bill_datum._replace(attachments=[attachment_entry]))
            else:
                bill_data.append(bill_datum)
        return bill_data

コード例 #17

0

ファイルを表示

    def export_csv(self, service_id, start: date, end: date) -> str:
        """Export CSV file and return path to downloaded file.

        Select meter service_id from Meter drop down
        Click triple bar button, then Export All Data (CSV)
        Adjust end date if needed: get latest to date from form, parse into a date, and set end to max(end, form_max_dt)
        Set from and to dates (mm/dd/yyyy) and click Download.
        Wait for file to download (.csv)
        Return path to csv file
        """

        self.driver.wait().until(
            EC.invisibility_of_element_located(
                (By.CSS_SELECTOR, "div.spinner-container")))

        self.driver.sleep(2)
        self.driver.find(self.MeterDropdownSelector).click()
        # wait for loading
        self.driver.sleep(5)

        meter_dropdown_selector = f'//table[@id="sdp_selector_table"]//a[contains(@class,"sdp-dropdown") and contains(.,"{service_id}")]'
        meter_id_dropdown_option = self.driver.find(meter_dropdown_selector,
                                                    xpath=True)
        scroll_to(self.driver, meter_id_dropdown_option)
        if not meter_id_dropdown_option:
            raise DataSourceConfigurationError(
                f"No meter found with service_id: {service_id}")

        meter_id_dropdown_option.click()
        self.driver.wait().until(
            EC.invisibility_of_element_located(
                (By.CSS_SELECTOR, "div.spinner-container")))
        self.driver.sleep(2)
        self.driver.find_element(*self.ExportCSVDropDownButtonLocator).click()
        self.driver.sleep(2)

        self.driver.find(self.ExportCSVLinkSelector).click()
        self.driver.wait().until(
            EC.visibility_of_element_located(
                (By.CSS_SELECTOR, self.FromDateInputSelector)))
        self.driver.sleep(2)
        from_date_input_field = self.driver.find(self.FromDateInputSelector)
        from_date_input_field.clear()
        from_date_input_field.send_keys(start.strftime("%m/%d/%Y"))

        to_date_input_field = self.driver.find(self.ToDateInputSelector)
        max_available_to_date = parse_date(
            to_date_input_field.get_attribute("placeholder")).date()

        end = max(max_available_to_date, end)

        to_date_input_field.clear()
        to_date_input_field.send_keys(end.strftime("%m/%d/%Y"))

        self.driver.find(self.DownloadButtonSelector).click()

        # Wait for csv to download
        download_dir = self.driver.download_dir
        filename = self.driver.wait().until(
            file_exists_in_dir(download_dir, r".*\.{}$".format("csv")))
        return os.path.join(download_dir, filename)

コード例 #18

0

ファイルを表示

ファイル: portland_bizportal.py プロジェクト: gnoose/datafeeds-shared

    def handle_pdfs(
        self,
        service_id,
        start: date,
        end: date,
        utility,
        utility_account_id,
        first_page=False,
    ) -> List[BillingDatum]:
        pdf_links_xpath = "//a[contains(text(), 'View Bill')]"

        download_dir = self.driver.download_dir
        bill_data: Optional[List[BillingDatum]]
        bill_data = []
        # The most recent bill link is a special case.
        # It does not download directly but opens a new page with a download link.
        first_link_found = False
        log.info("first_page is %s", first_page)
        if not first_page:
            first_link_found = True

        if not first_link_found:
            log.debug("looking for pdf_link_1 %s", pdf_links_xpath)
            pdf_link_1 = WebDriverWait(self.driver, 15).until(
                ec.presence_of_element_located((By.XPATH, pdf_links_xpath)))
            log.info("Downloading most recent bill; scroll to %s",
                     pdf_link_1.location["y"])
            self.driver.execute_script("window.scrollTo(0," +
                                       str(pdf_link_1.location["y"]) + ")")
            WebDriverWait(self.driver, 15).until(
                ec.element_to_be_clickable((By.XPATH, pdf_links_xpath)))
            self.driver.screenshot(
                BaseWebScraper.screenshot_path("most recent bill"))
            pdf_link_1.click()

            if (self.driver.current_url ==
                    "https://portlandgeneral.com/secure/view-bill"):
                download_bill_button_xpath = (
                    "//span[contains(text(), 'Download bill (PDF)')]")
                log.debug("scroll to scrollHeight/2")
                self.driver.execute_script(
                    "window.scrollTo(0, window.scrollY+(document.body.scrollHeight/2))"
                )
                time.sleep(2)
                log.debug("looking for download button %s",
                          download_bill_button_xpath)
                download_bill_button = WebDriverWait(self.driver, 25).until(
                    ec.presence_of_element_located(
                        (By.XPATH, download_bill_button_xpath)))

                try:
                    log.debug("clicking download")
                    download_bill_button.click()
                    # div[role="alert"] with text  No bill found.
                except ElementClickInterceptedException as exc:
                    log.debug("click intercepted: %s", exc)
                    close_modal(self.driver)
                    download_bill_button.click()
                time.sleep(1)

                filename = self.driver.wait(60).until(
                    file_exists_in_dir(download_dir, r".*\.pdf$"))

                file_path = os.path.join(download_dir, filename)
                log.info("Processing most recent bill: %s", filename)
                single_bill = extract_bill_data(file_path, service_id, utility,
                                                utility_account_id)

                bill_data.append(single_bill)
                log.info(
                    "first bill: %s - %s cost=%s",
                    single_bill.start,
                    single_bill.end,
                    single_bill.cost,
                )

                bill_history_button_xpath = (
                    "//span[contains(text(), 'Billing and payment history')]")
                bill_history_button = WebDriverWait(self.driver, 25).until(
                    ec.element_to_be_clickable(
                        (By.XPATH, bill_history_button_xpath)))
                log.info("Returning to bill history page")
                bill_history_button.click()

        pdf_links = WebDriverWait(self.driver, 25).until(
            ec.presence_of_all_elements_located((By.XPATH, pdf_links_xpath)))
        log.info("Found %s pdfs on page", len(pdf_links))
        self.driver.screenshot(BaseWebScraper.screenshot_path("found pdfs"))
        for link in pdf_links:
            if not first_link_found:
                first_link_found = True
                continue
            self.driver.execute_script("window.scrollTo(0," +
                                       str(link.location["y"]) + ")")
            time.sleep(2)
            if not self.seen_survey and close_survey(self.driver):
                self.seen_survey = True

            close_survey(self.driver)
            # get sibling node for date range text: 12/10/2020 - 01/12/2021
            match = re.match(
                r"(\d+/\d+/\d+) - (\d+/\d+/\d+)",
                link.find_element_by_xpath("../p").text,
            )
            from_dt = parse_time(match.group(1))
            to_dt = parse_time((match.group(2)))
            if to_dt < start:
                log.info("stoppinng: %s bill is before start", to_dt)
                break
            # filename is View_Bill-Dec. 10, 2020_Jan. 12, 2021.pdf
            filename = "View_Bill-%s_%s.pdf" % (
                from_dt.strftime("%b. %d, %Y"),
                to_dt.strftime("%b. %d, %Y"),
            )
            link.click()

            self.driver.wait(90).until(
                file_exists_in_dir(download_dir, filename))
            file_path = os.path.join(download_dir, filename)

            period_start, period_end = extract_bill_period(file_path)

            # If the bill starts after our end date, skip it
            if period_start > end:
                continue

            # If the bill ends before our start date, break and return (finding where to end)
            if period_end < start:
                break

            if not period_start or not period_end:
                log.info(
                    "Could not determine bill period for pdf %s. Skipping" %
                    file_path)
                continue

            single_bill = extract_bill_data(file_path, service_id, utility,
                                            utility_account_id)

            bill_data.append(single_bill)
            log.info(
                "added bill: %s - %s cost=%s",
                single_bill.start,
                single_bill.end,
                single_bill.cost,
            )

        non_overlapping_bills = _adjust_bill_dates(bill_data)
        return non_overlapping_bills

コード例 #19

0

ファイルを表示

    def reports_page_action(
            self, reports_page: saltriver_pages.SaltRiverReportsPage):
        log.info("goto_meter_profiles")
        reports_page.goto_meter_profiles()
        meter_page = saltriver_pages.MeterProfilesPage(self._driver)
        WebDriverWait(self._driver, 30).until(page_is_ready(meter_page))
        self.screenshot("meter profiles")

        log.info("get meters")
        meters = meter_page.get_meters()
        meter, channel = self.find_matching_meter_and_channel(
            meters, self.meter_id, self.channel_id)
        self.screenshot("meter and channel")

        log.info("goto reports")
        meter_page.goto_reports()
        WebDriverWait(self._driver, 30).until(page_is_ready(reports_page))
        time.sleep(10)
        log.info("looking for interval download")
        reports_page.goto_interval_download()
        interval_download_page = saltriver_pages.IntervalDownloadPage(
            self._driver)
        WebDriverWait(self._driver,
                      30).until(page_is_ready(interval_download_page))
        self.screenshot("interval download")
        log.info("interval download page is ready")
        interval_download_page.basic_configuration()
        interval_download_page.select_meter_by_id(meter.meter_id)

        start = self.start_date
        end = self.end_date

        # Snap the scraper start date to the data start date for the selected meter/channel.
        if start < channel.data_start:
            start = channel.data_start

        if start > end:
            raise InvalidDateRangeError(
                "The start date must be before the end date (start='{}', end='{}')"
                .format(start, end))

        # Pull out data 30 days at a time
        date_range = DateRange(start, end)
        interval_size = relativedelta(days=30)
        timeline = Timeline(start, end)
        for sub_range in date_range.split_iter(delta=interval_size):
            log.info("downloading %s", sub_range)
            self.clear_csv_downloads()
            interval_download_page.set_date_range(sub_range.start_date,
                                                  sub_range.end_date)
            interval_download_page.download_interval_data()
            self.screenshot("download %s" %
                            sub_range.end_date.strftime("%Y%m%d"))
            try:
                wait = WebDriverWait(self._driver, 180)
                csv_file_name = wait.until(
                    file_exists_in_dir(self._driver.download_dir, r".*\.csv"))
                csv_file_path = os.path.join(self._driver.download_dir,
                                             csv_file_name)
                for (when, reading) in parse_spatia_interval_csv(
                        csv_file_path, channel.id):
                    # The CSV file reports readings at the end of each fifteen minute interval. So the first reading
                    # of the day occurs at 00:15. and the last at midnight. We want to report the readings at the
                    # _start_ of each interval, thus we subtract 15 minutes here.
                    when = when - timedelta(minutes=15)
                    timeline.insert(when, reading)
            except TimeoutException:
                raise TimeoutException(
                    "Downloading interval data from SPATIA failed.")

        self.interval_data_timeline = timeline