def store_stats_database(statset: StatsSet) -> int: s = SessionLocal() records_to_store = [i.dict() for i in statset.stats] stmt = insert(Stats).values(records_to_store) stmt.bind = get_database_engine() stmt = stmt.on_conflict_do_update( index_elements=[ "stat_date", "country", "stat_type", ], set_={ "value": stmt.excluded.value, }, ) try: s.execute(stmt) s.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return 0 finally: s.close() num_records = len(records_to_store) logger.info("Wrote {} records to database".format(num_records)) return num_records
def process_unit_solution(table): session = SessionLocal() engine = get_database_engine() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] records_to_store = [] records_primary_keys = [] for record in records: trading_interval = parse_date( record["SETTLEMENTDATE"], network=NetworkNEM, dayfirst=False ) facility_code = normalize_duid(record["DUID"]) if not trading_interval or not facility_code: continue # Since this can insert 1M+ records at a time we need to # do a separate in-memory check of primary key constraints # better way of doing this .. @TODO _unique_set = (trading_interval, facility_code, "NEM") if _unique_set not in records_primary_keys: records_to_store.append( { "trading_interval": trading_interval, "facility_code": facility_code, "eoi_quantity": float(record["INITIALMW"]), "network_id": "NEM", } ) records_primary_keys.append(_unique_set) # free records_primary_keys = [] logger.debug("Saving %d records", len(records_to_store)) stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="facility_scada_pkey", set_={"eoi_quantity": stmt.excluded.eoi_quantity}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) return 0 finally: session.close() return len(records_to_store)
def store_bom_records_temp_max(recs: List[Dict]) -> None: # records_to_store = [i for i in recs if i["temp_max"] is not None] records_to_store = recs first = recs[0] update_field = "temp_max" if "temp_min" in first: update_field = "temp_min" engine = get_database_engine() session = SessionLocal() # insert stmt = insert(BomObservation).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["observation_time", "station_id"], set_={update_field: getattr(stmt.excluded, update_field)}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return {"num_records": 0} finally: session.close() return {"num_records": len(records_to_store)}
def process_item(self, item, spider=None): s = SessionLocal() csvreader = csv.DictReader(item["content"].split("\n")) records_to_store = [] primary_keys = [] for row in csvreader: trading_interval = parse_date(row["TRADING_DAY_INTERVAL"], network=NetworkWEM, dayfirst=False) if trading_interval not in primary_keys: forecast_load = clean_float(row["FORECAST_EOI_MW"]) records_to_store.append({ "created_by": spider.name, "trading_interval": trading_interval, "network_id": "WEM", "network_region": "WEM", "forecast_load": forecast_load, # generation_scheduled=row["Scheduled Generation (MW)"], # generation_total=row["Total Generation (MW)"], "price": clean_float(row["PRICE"]), }) primary_keys.append(trading_interval) stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = get_database_engine() stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "network_region", ], set_={ "price": stmt.excluded.price, "forecast_load": stmt.excluded.forecast_load, }, ) try: s.execute(stmt) s.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return 0 finally: s.close() return len(records_to_store)
def update_weather() -> None: wc = WillyClient() for bom_code, willyid in WILLY_MAP.items(): r = wc.get_location_temp(willyid, days=3, start_date="2021-10-14") data_points = r["observationalGraphs"]["temperature"]["dataConfig"][ "series"]["groups"] records = [] pprint(r) for pointset in data_points: # print(p) for p in pointset["points"]: r_dict = { "station_id": bom_code, "observation_time": unix_timestamp_to_aware_datetime(p["x"], "Australia/Sydney"), "temp_air": p["y"], } print("{} -> {}".format(r_dict["observation_time"], r_dict["temp_air"])) records.append(r_dict) session = SessionLocal() engine = get_database_engine() stmt = insert(BomObservation).values(records) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["observation_time", "station_id"], set_={ # "temp_apparent": stmt.excluded.temp_apparent, "temp_air": stmt.excluded.temp_air, }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) finally: session.close()
def process_pre_ap_price(table): session = SessionLocal() engine = get_database_engine() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] records_to_store = [] for record in records: trading_interval = parse_date( record["SETTLEMENTDATE"], network=NetworkNEM, dayfirst=False ) if not trading_interval: continue records_to_store.append( { "network_id": "NEM", "network_region": record["REGIONID"], "trading_interval": trading_interval, "price": record["PRE_AP_ENERGY_PRICE"], } ) stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="balancing_summary_pkey", set_={"price": stmt.excluded.price,}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return 0 finally: session.close() return len(records_to_store)
def process_meter_data_gen_duid(table): session = SessionLocal() engine = get_database_engine() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] records_to_store = [] for record in records: trading_interval = parse_date( record["INTERVAL_DATETIME"], network=NetworkNEM, dayfirst=False ) if not trading_interval: continue records_to_store.append( { "network_id": "NEM", "trading_interval": trading_interval, "facility_code": normalize_duid(record["DUID"]), "eoi_quantity": record["MWH_READING"], } ) stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="facility_scada_pkey", set_={"eoi_quantity": stmt.excluded.eoi_quantity,}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return 0 finally: session.close() return len(records_to_store)
def process_item(self, item, spider=None): session = SessionLocal() engine = get_database_engine() csvreader = csv.DictReader(item["content"].split("\n")) records_to_store = [] for row in csvreader: trading_interval = parse_date( row["Trading Interval"], dayfirst=True, network=NetworkWEM ) facility_code = normalize_duid(row["Facility Code"]) records_to_store.append( { "network_id": "WEM", "trading_interval": trading_interval, "facility_code": facility_code, "eoi_quantity": row["EOI Quantity (MW)"] or None, "generated": row["Energy Generated (MWh)"] or None, } ) stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="facility_scada_pkey", set_={ "eoi_quantity": stmt.excluded.eoi_quantity, "generated": stmt.excluded.generated, }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) finally: session.close() return len(records_to_store)
def process_item(self, item, spider=None): if "records" not in item: logger.error("Invalid return response") records = item["records"] engine = get_database_engine() session = SessionLocal() records_to_store = [] for record in records: records_to_store.append({ # "network_id": "WEM" if state == "WA" else "NEM", # "trading_interval": interval_time, # "facility_code": facility_code, # "generated": generated, }) # free primary_keys = None stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "facility_code", "is_forecast" ], set_={ "generated": stmt.excluded.generated, }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) finally: session.close() return len(records_to_store)
def process_trading_regionsum(table: Dict[str, Any], spider: Spider) -> Dict: session = SessionLocal() engine = get_database_engine() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] limit = None records_to_store = [] records_processed = 0 primary_keys = [] for record in records: trading_interval = parse_date( record["SETTLEMENTDATE"], network=NetworkNEM, dayfirst=False, date_format="%Y/%m/%d %H:%M:%S", ) if not trading_interval: continue _pk = set([trading_interval, record["REGIONID"]]) if _pk in primary_keys: continue primary_keys.append(_pk) demand_total = None if "TOTALDEMAND" in record: demand_total = clean_float(record["TOTALDEMAND"]) records_to_store.append({ "network_id": "NEM", "created_by": spider.name, "network_region": record["REGIONID"], "trading_interval": trading_interval, "demand_total": demand_total, }) records_processed += 1 if limit and records_processed >= limit: logger.info("Reached limit of: {} {}".format( limit, records_processed)) break stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["trading_interval", "network_id", "network_region"], set_={ "demand_total": stmt.excluded.demand_total, }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return {"num_records": 0} finally: session.close() return {"num_records": len(records_to_store)}
def process_dispatch_interconnectorres(table: Dict, spider: Spider) -> Dict: session = SessionLocal() engine = get_database_engine() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] records_to_store = [] for record in records: ti_value = None if "SETTLEMENTDATE" in record: ti_value = record["SETTLEMENTDATE"] if "RUN_DATETIME" in record: ti_value = record["RUN_DATETIME"] if not ti_value: raise Exception("Require a trading interval") trading_interval = parse_date(ti_value, network=NetworkNEM, dayfirst=False) if not trading_interval: continue facility_code = normalize_duid(record["INTERCONNECTORID"]) power_value = clean_float(record["METEREDMWFLOW"]) records_to_store.append({ "network_id": "NEM", "created_by": spider.name, "facility_code": facility_code, "trading_interval": trading_interval, "generated": power_value, }) # remove duplicates return_records_grouped = {} for pk_values, rec_value in groupby( records_to_store, key=lambda r: ( r.get("trading_interval"), r.get("network_id"), r.get("facility_code"), ), ): if pk_values not in return_records_grouped: return_records_grouped[pk_values] = list(rec_value).pop() records_to_store = list(return_records_grouped.values()) # insert stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "facility_code", "is_forecast" ], set_={"generated": stmt.excluded.generated}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return {"num_records": 0} finally: session.close() return {"num_records": len(records_to_store)}
def process_item(self, item, spider=None): if "records" not in item: logger.error("Invalid return response") records = item["records"] is_latest = False record_date = None if "meta" in item: if "is_latest" in item["meta"]: is_latest = item["meta"]["is_latest"] if "record_date" in item["meta"]: record_date = item["meta"]["record_date"] if "postcode" not in records: logger.error("No postcode data") if "installations" not in records: logger.error("No postcode data") if "postcodeCapacity" not in records: logger.error("No postcode capacity data") postcode_gen = records["postcode"] postcode_capacity = records["postcodeCapacity"] installations = records["installations"] engine = get_database_engine() session = SessionLocal() records_to_store = [] created_at = datetime.now() created_by = "" if spider and hasattr(spider, "name"): created_by = spider.name for record in postcode_gen: for state, prefix in STATE_POSTCODE_PREFIXES.items(): facility_code = "{}_{}_{}".format(ROOFTOP_CODE, "apvi".upper(), state.upper()) interval_time = parse_date( record["ts"], dayfirst=False, yearfirst=True, ) interval_time = interval_time.astimezone( NetworkNEM.get_timezone()) generated = sum([ float(v) / 100 * postcode_capacity[k] for k, v in record.items() if k.startswith(prefix) and v and k in postcode_capacity and k[:2] not in WA_NON_SWIS ]) if not generated: continue __record = { "created_by": created_by, "created_at": created_at, "network_id": "APVI", "trading_interval": interval_time, "facility_code": facility_code, "generated": generated, } records_to_store.append(__record) STATE_CAPACITIES = {} if is_latest: # temporariy only run getting capacities on latest logger.info("Updating capacities on %s", record_date) for postcode_prefix, capacity_val in postcode_capacity.items(): for state, prefix in STATE_POSTCODE_PREFIXES.items(): if state not in STATE_CAPACITIES: STATE_CAPACITIES[state] = 0 if postcode_prefix.startswith(prefix): STATE_CAPACITIES[state] += capacity_val for state, state_capacity in STATE_CAPACITIES.items(): facility_code = "{}_{}_{}".format(ROOFTOP_CODE, "apvi".upper(), state.upper()) state_facility: Facility = (session.query(Facility).filter_by( code=facility_code).one_or_none()) if not state_facility: raise Exception("Could not find rooftop facility for %s", facility_code) state_facility.capacity_registered = state_capacity if state.lower() in installations: state_number_units = installations[state.lower()] state_facility.unit_number = state_number_units session.add(state_facility) session.commit() if len(records_to_store) < 1: return 0 stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "facility_code", "is_forecast" ], set_={ "generated": stmt.excluded.generated, "created_by": stmt.excluded.created_by, }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) finally: session.close() return len(records_to_store)
def process_item(self, item, spider=None): session = SessionLocal() engine = get_database_engine() csvreader = csv.DictReader(item["content"].split("\n")) records_to_store = [] last_asat = None for row in csvreader: if row["AS_AT"] != "": last_asat = parse_date( row["AS_AT"], dayfirst=True, network=NetworkWEM ) if not last_asat or type(last_asat) is not datetime: logger.error("Invalid row or no datetime") continue # We need to pivot the table since columns are time intervals for i in range(1, 48): column = f"I{i:02}" if column not in row: logger.error( "Do not have data for interval {}".format(column) ) continue if i > 0: interval = last_asat - timedelta(minutes=(i - 1) * 30) else: interval = last_asat facility_code = normalize_duid(row["FACILITY_CODE"]) val = None try: val = float(row[column]) / 2 or None except ValueError: pass records_to_store.append( { "network_id": "WEM", "trading_interval": interval, "facility_code": facility_code, "generated": val, } ) stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="facility_scada_pkey", set_={"generated": stmt.excluded.generated,}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) finally: session.close() return len(records_to_store)
def process_item(self, item, spider=None): if not item: logger.error("No item in pipeline") return None if "content" not in item: logger.error("No content in pipeline") return None content = item["content"] if "ELEC_NEM_SUMMARY" not in content: logger.error("Invalid summary return") return None summary = content["ELEC_NEM_SUMMARY"] s = SessionLocal() records_to_store = [] for row in summary: trading_interval = parse_date(row["SETTLEMENTDATE"], network=NetworkNEM) demand_total = clean_float(row["TOTALDEMAND"]) generation_scheduled = clean_float(row["SCHEDULEDGENERATION"]) generation_non_scheduled = clean_float(row["SEMISCHEDULEDGENERATION"]) records_to_store.append( { "created_by": spider.name, "trading_interval": trading_interval, "network_id": "NEM", "network_region": row["REGIONID"].strip(), # "demand_total": demand_total, "generation_scheduled": generation_scheduled, "generation_non_scheduled": generation_non_scheduled, } ) stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = get_database_engine() stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "network_region", ], set_={ # "demand_total": stmt.excluded.demand_total, "generation_scheduled": stmt.excluded.generation_scheduled, "generation_non_scheduled": stmt.excluded.generation_non_scheduled, }, ) try: s.execute(stmt) s.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return {"num_records": 0} finally: s.close() return {"num_records": len(records_to_store)}
def process_item(self, item, spider): session = SessionLocal() engine = get_database_engine() records_to_store = [] if "records" not in item: return 0 records = item["records"] for obs in records: records_to_store.append({ "station_id": obs["code"], "observation_time": parse_date(obs["aifstime_utc"], dayfirst=False, is_utc=True), "temp_apparent": obs["apparent_t"], "temp_air": obs["air_temp"], "press_qnh": obs["press_qnh"], "wind_dir": obs["wind_dir"], "wind_spd": obs["wind_spd_kmh"], "wind_gust": obs["gust_kmh"], "cloud": obs["cloud"].replace("-", ""), "cloud_type": obs["cloud_type"].replace("-", ""), "humidity": obs["rel_hum"], }) stmt = insert(BomObservation).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="bom_observation_pkey", set_={ "temp_apparent": stmt.excluded.temp_apparent, "temp_air": stmt.excluded.temp_air, "press_qnh": stmt.excluded.press_qnh, "wind_dir": stmt.excluded.wind_dir, "wind_spd": stmt.excluded.wind_spd, "wind_gust": stmt.excluded.wind_gust, "cloud": stmt.excluded.cloud, "cloud_type": stmt.excluded.cloud_type, "humidity": stmt.excluded.humidity, }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) return 0 finally: session.close() return len(records_to_store)
def process_item(self, item, spider): session = SessionLocal() engine = get_database_engine() records_to_store = [] if "records" not in item: return 0 records = item["records"] if "header" not in item: logger.error("No header in bom observation") return 0 header = item["header"] if "state_time_zone" not in header: print(header) logger.error("No state timezone in header") return 0 timezone_state: str = header["state_time_zone"].strip().upper() if timezone_state not in STATE_TO_TIMEZONE.keys(): logger.error("No timezone for state: %s", timezone_state) timezone = pytz.timezone(STATE_TO_TIMEZONE[timezone_state]) for obs in records: observation_time = parse_date(obs["aifstime_utc"], dayfirst=False, is_utc=True).astimezone(timezone) code = obs["code"] if not observation_time or not code: continue records_to_store.append({ "station_id": code, "observation_time": observation_time, "temp_apparent": obs["apparent_t"], "temp_air": obs["air_temp"], "press_qnh": obs["press_qnh"], "wind_dir": obs["wind_dir"], "wind_spd": obs["wind_spd_kmh"], "wind_gust": obs["gust_kmh"], "cloud": obs["cloud"].replace("-", ""), "cloud_type": obs["cloud_type"].replace("-", ""), "humidity": obs["rel_hum"], }) if not len(records_to_store): return 0 stmt = insert(BomObservation).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["observation_time", "station_id"], set_={ "temp_apparent": stmt.excluded.temp_apparent, "temp_air": stmt.excluded.temp_air, "press_qnh": stmt.excluded.press_qnh, "wind_dir": stmt.excluded.wind_dir, "wind_spd": stmt.excluded.wind_spd, "wind_gust": stmt.excluded.wind_gust, "cloud": stmt.excluded.cloud, "cloud_type": stmt.excluded.cloud_type, "humidity": stmt.excluded.humidity, }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) return 0 finally: session.close() return len(records_to_store)