def test_parse_account_row(self):
     """Test the basic functionality of the parse_account_row function."""
     account_row = {
         "PK": 1,
         "UtilityProvider": "test_provider",
         "AccountNumber": "12345",
         "RawAccountNumber": "1234-5",
         "SourceLink": "test_link",
         "StatementType": "test_statement_type",
         "StatementDate": date(2000, 1, 1),
         "IntervalStart": date(2000, 2, 1),
         "IntervalEnd": date(2000, 3, 1),
         "TotalBillAmount": Decimal("100.00"),
         "AmountDue": Decimal("200.00"),
         "NewCharges": Decimal("80.00"),
         "OutstandingBalance": Decimal("90.00"),
         "PreviousBalance": Decimal("120.00"),
         "__EXTRA1": "EXTRA1",  # It's okay to have extra fields
         "__EXTRA2": "EXTRA2",
     }
     result = UrjanetPyMySqlDataSource.parse_account_row(account_row)
     for field in account_row:
         if field.startswith("__EXTRA"):
             with self.assertRaises(AttributeError):
                 getattr(result, field)
         else:
             self.assertEqual(getattr(result, field), account_row[field])
Beispiel #2
0
    def load_accounts(self) -> List[Account]:
        """Load accounts based on the account id.

        For this utility, a statement is only admissible to our
        system if there is a meter associated with it. (There's at
        most one meter per account.)
        """
        query = """
            select *
            from Account
            where AccountNumber=%s and Account.UtilityProvider='NVEnergy'
        """
        result_set = self.fetch_all(query, self.account_number)
        # Group the statements by interval, and take the latest statement for a given interval
        result_set = [
            max(group, key=lambda stmt: stmt["StatementDate"])
            for _, group in itertools.groupby(
                result_set, lambda stmt:
                (stmt["IntervalStart"], stmt["IntervalEnd"]))
        ]

        return [
            UrjanetPyMySqlDataSource.parse_account_row(row)
            for row in result_set
        ]
    def load_accounts(self) -> List[Account]:
        """Load accounts based on the account id.

        Note: Currently this scraper does NOT filter by utility name.
        The reason for this is that some of the older datapoints for
        these meters have a different utility name, but the Urjanet
        data adheres to the same constraints. This may need to be
        revisited in the future.
        """
        query = """
            select *
            from Account, Meter
            where Account.PK=Meter.AccountFK and Meter.PODid=%s
        """
        result_set = self.fetch_all(query, self.account_number)

        # For each billing period, we extract the most recent statement,
        # to account for billing corrections.
        groups: DefaultDict[Tuple, SqlQueryResult] = defaultdict(list)
        for stmt in result_set:
            date_range = (stmt["IntervalStart"], stmt["IntervalEnd"])
            groups[date_range].append(stmt)
        get_date = itemgetter("StatementDate")

        return [
            UrjanetPyMySqlDataSource.parse_account_row(row)
            for row in [max(group, key=get_date) for group in groups.values()]
        ]
Beispiel #4
0
 def load_accounts(self) -> List[Account]:
     query = """
         select *
         from Account
         where RawAccountNumber=%s and UtilityProvider = 'PugetSoundEnergy'
     """
     result_set = self.fetch_all(query, self.account_number)
     return [UrjanetPyMySqlDataSource.parse_account_row(row) for row in result_set]
Beispiel #5
0
 def load_accounts(self) -> List[Account]:
     query = """
         SELECT *
         FROM Account
         WHERE AccountNumber=%s AND UtilityProvider = 'AmericanWater'
     """
     result_set = self.fetch_all(query, self.account_number)
     return [UrjanetPyMySqlDataSource.parse_account_row(row) for row in result_set]
 def load_accounts(self) -> List[Account]:
     query = """
         select *
         from Account
         where AccountNumber=%s and UtilityProvider = 'ContraCostaWaterDistrictCA'
     """
     acct_no = self.account_number
     result_set = self.fetch_all(query, acct_no)
     return [UrjanetPyMySqlDataSource.parse_account_row(row) for row in result_set]
Beispiel #7
0
 def load_accounts(self) -> List[Account]:
     """Load accounts based on the account id"""
     query = """
         SELECT *
         FROM Account
         WHERE AccountNumber=%s AND UtilityProvider = 'CityOfSouthlakeTX'
     """
     result_set = self.fetch_all(query, self.account_number)
     return [UrjanetPyMySqlDataSource.parse_account_row(row) for row in result_set]
Beispiel #8
0
 def load_accounts(self) -> List[Account]:
     """Load accounts based on the account id"""
     query = """
         SELECT *
         FROM Account
         WHERE AccountNumber=%s AND UtilityProvider = 'LADeptOfWAndP'
     """
     acct_no = self.account_number
     result_set = self.fetch_all(query, acct_no)
     return [UrjanetPyMySqlDataSource.parse_account_row(row) for row in result_set]
 def test_parse_account_row_valueerror(self):
     """Test that parse_account_row raises a ValueError when a field has an invalid value"""
     account_row = {
         "PK": 1,
         "UtilityProvider": "test_provider",
         "AccountNumber": "12345",
         "RawAccountNumber": "1234-5",
         "SourceLink": "test_link",
         "StatementType": "test_statement_type",
         "StatementDate": "not_a_date",  # ValueError should occur here
         "IntervalStart": date(2000, 2, 1),
         "IntervalEnd": date(2000, 3, 1),
         "TotalBillAmount": Decimal("100.00"),
         "AmountDue": Decimal("200.00"),
         "NewCharges": Decimal("80.00"),
         "OutstandingBalance": Decimal("90.00"),
         "PreviousBalance": Decimal("120.00"),
     }
     with self.assertRaises(ValueError):
         UrjanetPyMySqlDataSource.parse_account_row(account_row)
Beispiel #10
0
 def load_accounts(self) -> List[Account]:
     """Get by account number."""
     query = """
         SELECT *
         FROM Account
         WHERE AccountNumber=%s AND UtilityProvider = 'LADeptOfWAndP'
     """
     result_set = self.fetch_all(query, self.account_number)
     return [
         UrjanetPyMySqlDataSource.parse_account_row(row)
         for row in result_set
     ]
Beispiel #11
0
 def load_accounts(self) -> List[Account]:
     """Load accounts based on the account id."""
     query = """
         select *
         from Account
         where AccountNumber like %s and UtilityProvider = 'SDGAndE'
     """
     result_set = self.fetch_all(
         query, "%{}%".format(self.account_number.replace(" ", "")))
     return [
         UrjanetPyMySqlDataSource.parse_account_row(row)
         for row in result_set
     ]
Beispiel #12
0
    def load_accounts(self) -> List[Account]:
        """Load accounts based on the account id."""
        query = """
            select *
            from Account
            where AccountNumber=%s and Account.UtilityProvider='FPL'
        """
        result_set = self.fetch_all(query, self.account_number)

        return [
            UrjanetPyMySqlDataSource.parse_account_row(row)
            for row in result_set
        ]
    def load_accounts(self) -> List[Account]:
        """We are pulling Clean Power Alliance charges off of an SCE bill.

        The AccountNumber in urjanet is the UtilityService.gen_service_id in our db.
        """
        query = """
            SELECT *
            FROM Account
            WHERE AccountNumber=%s AND UtilityProvider = 'SCE'
        """
        result_set = self.fetch_all(
            query, self.normalize_account_number(self.gen_service_id))
        return [
            UrjanetPyMySqlDataSource.parse_account_row(row)
            for row in result_set
        ]
Beispiel #14
0
    def load_accounts(self) -> List[Account]:
        """Load accounts based on the account id.

        For this utility, a statement is only admissible to our
        system if there is a meter associated with it. (There's at
        most one meter per account.)
        """
        query = """
            select *
            from Account, Meter
            where Account.PK = Meter.AccountFK and Account.RawAccountNumber=%s
                and Account.UtilityProvider='NationalGrid'
        """
        result_set = self._sanitize_statements(
            self.fetch_all(query, self.account_number)
        )
        return [UrjanetPyMySqlDataSource.parse_account_row(row) for row in result_set]
Beispiel #15
0
    def load_accounts(self) -> List[Account]:
        """Load accounts based on the account id"""

        # This query finds all Urjanet accounts which either:
        # (1) Have a RawAccountNumber prefixed by this.account_number
        # (2) Have a RawAccountNumber == this.account_number, after removing dashes from the former
        self.validate()
        query = """
            SELECT *
            FROM Account
            WHERE
                (RawAccountNumber LIKE %s OR REPLACE(RawAccountNumber, '-', '')=%s)
                AND UtilityProvider = 'PacGAndE'
        """
        account_number_prefix_regex = "{}%".format(self.account_number)
        result_set = self.fetch_all(query, account_number_prefix_regex,
                                    self.account_number)
        return [
            UrjanetPyMySqlDataSource.parse_account_row(row)
            for row in result_set
        ]
    def load_accounts(self) -> List[Account]:
        """Load accounts based on the account id.

        Normally, we would allow a billing scraper to be parameterized by
        account id. However, we have only one account, and in fact only one
        meter, for this utility. Rather than increase complexity (which
        would mean touching more systems than tasks), we just use constants.
        We should revisit this if we add more accounts with Constellation
        billing.
        """
        query = """
            select *
            from Account
            where AccountNumber=%s and UtilityProvider='Constellation' and
                StatementType='statement_type_bill'
        """
        result_set = self.fetch_all(query, self.account_number)

        return [
            UrjanetPyMySqlDataSource.parse_account_row(row)
            for row in result_set
        ]
Beispiel #17
0
    def load_accounts(self) -> List[Account]:
        """
        Load third party urjanet "accounts" based on any utility account id that we have recorded for the service.

        The "xmlaccount" table has a mix of PG&E and third party Utility Providers.  This generation scraper is
        only loading urja accounts that are *not* PG&E providers.
        """
        self.validate()

        # For testing, where you may not have a utility service.
        utility_account_ids = [self.utility_account_number.strip()]
        stripped_historical_ids = []

        if self.utility_service:
            # Fetching historical utility_account_ids and gen_account_ids from snapshot table
            historical_account_ids = [
                account_id[0].strip() for account_id in (db.session.query(
                    UtilityServiceSnapshot.utility_account_id).filter(
                        UtilityServiceSnapshot.service ==
                        self.utility_service.oid,
                        UtilityServiceSnapshot.utility_account_id.isnot(None),
                        UtilityServiceSnapshot.utility_account_id != "",
                    ).all())
            ]
            for account_id in historical_account_ids:
                # PG&E utility account ids sometimes have a check digit (-0) at the end, and sometimes don't.
                # We're including a version w/out the check digit, just in case the Urjanet data
                # is missing it.
                stripped_historical_ids.append(_remove_check_digit(account_id))

            historical_gen_account_ids = [
                account_id[0].strip() for account_id in (db.session.query(
                    UtilityServiceSnapshot.gen_utility_account_id
                ).filter(
                    UtilityServiceSnapshot.service == self.utility_service.oid,
                    UtilityServiceSnapshot.gen_utility_account_id.isnot(None),
                    UtilityServiceSnapshot.gen_utility_account_id != "",
                ).all())
            ]

            for account_id in historical_gen_account_ids:
                stripped_historical_ids.append(_remove_check_digit(account_id))

            # Combining historical ids with values passed into scraper, and the current service config.
            # The history *should* contain all of these items, but we want to cover our bases.
            utility_account_ids = list(
                set(
                    itertools.chain(
                        historical_account_ids,
                        historical_gen_account_ids,
                        stripped_historical_ids,
                        list(
                            filter(
                                None,
                                [
                                    (self.utility_account_number
                                     or "").strip(),
                                    (self.account_number or "").strip(),
                                    (self.utility_service.utility_account_id
                                     or "").strip(),
                                    (self.utility_service.
                                     gen_utility_account_id or "").strip(),
                                ],
                            )),
                    )))

        log.info(
            "Searching across these PG&E account number variations: {}".format(
                [num for num in utility_account_ids]))

        # Locate accounts that are associated with the utility-account-numbers that
        # are *not* PG&E, leaving third party.
        query = """
           SELECT *
           FROM xmlaccount
           WHERE
               RawAccountNumber REGEXP %s
               AND UtilityProvider != 'PacGAndE'
        """

        accounts = [
            UrjanetPyMySqlDataSource.parse_account_row(row)
            for row in self.fetch_all(query, "|".join(utility_account_ids))
        ]
        account_pks = [account.PK for account in accounts]
        self.service_ids = self.get_all_service_ids(account_pks)
        log.info(
            "Searching for third party charges across these PG&E service_ids: {}"
            .format([num for num in self.service_ids]))
        return accounts