def from_json(cls, meter_id: int, readings: Dict[str, List[float]]) -> List["MeterReading"]: """Convert a list of dict of readings returned by a scraper to a list of MeterReadings. Throw an exception if meter interval doesn't match the size of the readings array. Don't create a MeterReading if values are empty or all None. This does not compare against existing MeterReading objects for the same meter/day. """ meter = db.session.query(Meter).get(meter_id) expected = int(24 * 60 / meter.interval) reading_objects: List[MeterReading] = [] for dt_str in readings: day_readings = readings[dt_str] if not len(day_readings): continue # skip if all values are empty if {None} == set(day_readings): continue # Cast integers (which are an acceptable input) to floating point values for consistency. # Leave all other values alone. day_readings = [ float(val) if isinstance(val, int) else val for val in day_readings ] # throw error if values are not floats for val in day_readings: if val is not None and type(val) != float: raise InvalidMeterDataException( "expected readings to be floats; found %s %s in %s" % (val, type(val), day_readings)) # throw error if length does not match meter interval if len(day_readings) != expected: raise InvalidMeterDataException( "expected %s readings for meter with %s minute interval; found %s for %s" % (expected, meter.interval, len(day_readings), dt_str)) reading_objects.append( MeterReading( meter=meter_id, occurred=date_parser.parse(dt_str).date(), readings=day_readings, frozen=False, modified=datetime.now(), )) return reading_objects
def parse_ival_data(s): if s.strip() == "": return None try: return float(s) except ValueError: raise InvalidMeterDataException( "Invalid meter reading: {0}".format(s))
def to_positive(readings: IntervalReadings, meter: Meter) -> Tuple[IntervalReadings, List[IntervalIssue]]: """All readings should be positive values. The meter.direction field determines how the values should be treated when summing. Return updated readings and list of issues. """ transformed = copy.deepcopy(readings) sign = None issues = [] meter_tz = tz.gettz(meter.timezone) for day in readings: day_dt = date_parser.parse(day) for idx, val in enumerate(readings[day]): if not val: continue this_sign = "positive" if val > 0.0 else "negative" if not sign: sign = this_sign # all values in a set of readings data must have the same sign (flow direction) if this_sign != sign: interval_dt = day_dt + timedelta(minutes=(idx * meter.interval)) issues.append( IntervalIssue( interval_dt=interval_dt.replace(tzinfo=meter_tz), error= "sign of value is different than previously seen values", value=val, )) transformed[day][idx] = abs(val) # can't recover from mixed positive and negative values if issues: description: List[str] = [] for row in issues: description.append( "%s = %s" % (row.interval_dt.strftime("%Y-%m-%d %H:%M"), row.value)) account = meter.account() if account: url = "https://snapmeter.com/admin/accounts/%s/meters/%s" % ( account.hex_id, meter.oid, ) else: url = "Meter %s (%s)" % (meter.name, meter.oid) post_slack_message( "Scraper found mixed positive and negative readings for meter %s (%s); create a submeter to capture these." % (url, meter.direction), "#scrapers", ":exclamation:", username="******", ) raise (InvalidMeterDataException( "mixed positive and negative values: %s" % description)) return transformed, issues
def _parse_csv_row(self, row): if len(row) not in self.EXPECTED_CSV_COLUMNS: raise InvalidMeterDataException( "Unexpected CSV row length ({0} not in {1})".format( len(row), self.EXPECTED_CSV_COLUMNS)) try: date = date_parser.parse(row[1]) except Exception: raise InvalidMeterDataException( "Failed to parse date ({0})".format(row[1])) return CsvRow( account=row[0], date=date, channel_id=str(row[2]).strip().lower(), units=row[3].lower().strip(), interval_data=[self._parse_ival_data(s) for s in row[4:]], )
def parse_csv_row(row): if len(row) != EXPECTED_CSV_LEN: raise InvalidMeterDataException( "Unexpected CSV row length ({0} != {1})".format( len(row), EXPECTED_CSV_LEN)) try: date = dateparser.parse(row[1]) except Exception: raise InvalidMeterDataException( "Failed to parse date ({0})".format(row[1])) return CsvRow( account=row[0], date=date, units=row[3].lower().strip(), interval_data=[ NationalGridIntervalScraper.parse_ival_data(s) for s in row[4:] ], )
def download_page_action( self, page: sce_pages.SceEnergyManagerGreenButtonDownload): page.download(self.start_date, self.end_date) # get filename prefix = f"{config.WORKING_DIRECTORY}/current" # filename looks like SCE_Usage_3-049-8416-02_10-01-20_to_10-15-20.csv log.info("downloaded files=%s", [fn for fn in os.listdir(prefix)]) filenames = [fn for fn in os.listdir(prefix) if self.service_id in fn] if not filenames: raise InvalidMeterDataException( "missing downloaded file containing %s" % self.service_id) with open("%s/%s" % (prefix, filenames[0])) as f: data = f.read() # parse out fields starting with date; save start (1st) date and value # add to timeline: self.interval_data_timeline.insert(dt, val) # regex to match line starting with: "date time starts_with_date_re = re.compile( r'^"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}') lines = data.splitlines() log.info(f"downloaded {len(lines)} lines") if not lines: raise DataSourceConfigurationError( "no data downloaded; may need to set serviceAddress metadata") to_kw = 60 / self._configuration.meter.interval timeline = Timeline(self.start_date, self.end_date) for data_line in lines: if not starts_with_date_re.match(data_line): continue # data_line has a \xa0 space before; remove that data_line = data_line.replace("\xa0", " ") # "2020-08-15 00:00:00 to 2020-08-15 00:15:00","2.800","" from_dt_string = data_line.split(" to ")[0].replace('"', "") from_dt = parse_date(from_dt_string) _value = data_line.split('"')[3].replace( ",", "" ) # not sure if there can be commas in the value but remove them if there are... # values are kWh: Usage(Real energy in kilowatt-hours); convert to kW using the meter interval value = float(_value) * to_kw timeline.insert(from_dt, value) self.interval_data_timeline = timeline
def parse_poway_pdf(pdf_filename: str, account_id: str) -> BillingDatum: text = pdfparser.pdf_to_str(pdf_filename) used_pattern = r"Consumption (?P<units_used>[\d\.,]+) @" cost_pattern = r"(?P<water_charges>[\d\.,]+)\s+WATERBasic Service @" # date format: m/d/yyyy date_pattern = r"\d{1,2}\/\d{1,2}\/\d{4}" dates_pattern = ( r"Total Current Charges.+?" fr"(?P<read_date_start>{date_pattern}) - (?P<read_date_end>{date_pattern})" fr"(?P<due_date>{date_pattern})" fr"(?P<statement_date>{date_pattern})") dates_match = re.search(dates_pattern, text) if not dates_match: raise InvalidMeterDataException( f"Couldn't parse dates from pdf: {text}") _dates = dates_match.group("read_date_start", "read_date_end", "statement_date") start_date, end_date, statement_date = [ parse_date(_date).date() for _date in _dates ] used_match = re.search(used_pattern, text) if not used_match: raise InvalidMeterDataException( "fCouldn't parse usage from pdf: {text}") used_text = used_match.group("units_used") used = float(used_text.replace(",", "").replace("$", "")) cost_match = re.search(cost_pattern, text) if not cost_match: raise InvalidMeterDataException( f"Couldn't parse cost from pdf: {text}") cost_text = cost_match.group("water_charges") cost = float(cost_text.replace(",", "").replace("$", "")) if config.enabled("S3_BILL_UPLOAD"): key = hash_bill(account_id, start_date, end_date, cost, 0, used) with open(pdf_filename, "rb") as pdf_data: attachments = [ upload_bill_to_s3( BytesIO(pdf_data.read()), key, source="customerconnect.poway.org", statement=statement_date, utility="utility:city-of-poway", utility_account_id=account_id, ) ] else: attachments = [] return BillingDatum( start=start_date, end=end_date - timedelta(days=1), statement=statement_date, cost=cost, peak=None, used=used, items=None, attachments=attachments, utility_code=None, )
def search_success_action(self, page: sce_pages.SceAccountSearchSuccess): if not page.view_usage_for_search_result(self.service_id): raise InvalidMeterDataException("service_id |%s| not found" % self.service_id)