def process_item(self, item, spider=None): s = self.session() records_updated = 0 records_created = 0 for record in item: created = False duid = normalize_duid(record["DUID"]) station_code = facility_map_station( duid, normalize_duid(record["STATIONID"])) station = (s.query(Station).filter( Station.network_code == station_code).one_or_none()) facility = (s.query(Facility).filter( Facility.network_code == duid).one_or_none()) if not station: station = Station( code=station_code, network_code=station_code, network_id="NEM", created_by="au.nem.mms.statdualloc", ) if not facility: facility = Facility( code=duid, network_code=duid, network_id="NEM", status_id="retired", created_by="au.nem.mms.statdualloc", ) records_created += 1 created = True else: facility.updated_by = "au.nem.mms.statdualloc" records_updated += 1 facility.station = station try: s.add(facility) s.commit() except Exception as e: logger.error(e) logger.debug("{} facility record with id {}".format( "Created" if created else "Updated", duid)) logger.info("Created {} facility records and updated {}".format( records_created, records_updated))
def process_unit_solution(table): session = SessionLocal() engine = get_database_engine() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] records_to_store = [] records_primary_keys = [] for record in records: trading_interval = parse_date( record["SETTLEMENTDATE"], network=NetworkNEM, dayfirst=False ) facility_code = normalize_duid(record["DUID"]) if not trading_interval or not facility_code: continue # Since this can insert 1M+ records at a time we need to # do a separate in-memory check of primary key constraints # better way of doing this .. @TODO _unique_set = (trading_interval, facility_code, "NEM") if _unique_set not in records_primary_keys: records_to_store.append( { "trading_interval": trading_interval, "facility_code": facility_code, "eoi_quantity": float(record["INITIALMW"]), "network_id": "NEM", } ) records_primary_keys.append(_unique_set) # free records_primary_keys = [] logger.debug("Saving %d records", len(records_to_store)) stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="facility_scada_pkey", set_={"eoi_quantity": stmt.excluded.eoi_quantity}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) return 0 finally: session.close() return len(records_to_store)
def process_item(self, item, spider=None): s = self.session() record_count = 0 csvreader = csv.DictReader(item["content"].split("\n")) for row in csvreader: if "PARTICIPANT_CODE" not in row: logger.error("Invalid row") continue participant = None created_record = False participant_code = normalize_duid( normalize_string(row["PARTICIPANT_CODE"]) ) participant_name = participant_name_filter( row["PARTICIPANT_DISPLAY_NAME"] ) or participant_name_filter( row.get("PARTICIPANT_FULL_NAME", None) ) participant = ( s.query(Participant) .filter(Participant.code == participant_code) .one_or_none() ) if not participant: participant = Participant( code=participant_code, name=participant_name, created_by="pipeline.wem.live.participant", ) created_record = True logger.info( "Created new WEM participant: {}".format(participant_code) ) elif participant.name != participant_name: participant.name = participant_name participant.updated_by = "pipeline.wem.live.participant" logger.info( "Updated WEM participant: {}".format(participant_code) ) try: s.add(participant) s.commit() record_count += 1 except Exception as e: logger.error(e) s.close() return record_count
def process_item(self, item, spider=None): s = self.session() for record in item: duid = normalize_duid(record["STATIONID"]) # authorized_date = name_normalizer(record["AUTHORISEDDATE"]) # @TODO this needs to be mapped to v3 state status = record["STATUS"] station = (s.query(Station).filter( Station.network_code == duid).one_or_none()) if not station: logger.error("Could not find station {}".format(duid)) continue # @TODO station statuses -> facilities should be # set to retired if active try: s.add(station) s.commit() except Exception as e: logger.error(e)
def power_unit( unit_code: str = Query(..., description="Unit code"), network_code: str = Query(..., description="Network code"), interval_human: str = Query(None, description="Interval"), period_human: str = Query("7d", description="Period"), engine=Depends(get_database_engine), ) -> OpennemDataSet: network = network_from_network_code(network_code) if not network: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="No such network", ) if not interval_human: interval_human = "{}m".format(network.interval_size) interval = human_to_interval(interval_human) period = human_to_period(period_human) units = get_unit("power") stats = [] facility_codes = [normalize_duid(unit_code)] query = power_facility_query(facility_codes, network.code, interval=interval, period=period) with engine.connect() as c: results = list(c.execute(query)) stats = [ DataQueryResult(interval=i[0], result=i[1], group_by=i[2] if len(i) > 1 else None) for i in results ] if len(stats) < 1: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Unit stats not found", ) output = stats_factory( stats, code=unit_code, interval=interval, period=period, units=units, network=network, ) if not output: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="No stats found", ) return output
def get_unique_duid(units: list) -> str: if not type(units) is list or len(units) < 1: return None first_record = units[0] return (normalize_duid(first_record["duid"]) if "duid" in first_record else None)
def process_item(self, item, spider=None): s = self.session() record_count = 0 csvreader = csv.DictReader(item["content"].split("\n")) for row in csvreader: if "Participant Code" not in row: logger.error("Invalid row") continue participant = None participant_code = normalize_duid(row["Participant Code"]) participant_name = participant_name_filter(row["Participant Name"]) participant = ( s.query(Participant) .filter(Participant.code == participant_code) .one_or_none() ) if not participant: participant = Participant( code=participant_code, name=participant_name, # @TODO WEM provides these but nem doesn't so ignore for noe # address=row["Address"], # city=row["City"], # state=row["State"], # postcode=row["Postcode"], created_by="pipeline.wem.participant", ) logger.info( "Created new WEM participant: {}".format(participant_code) ) elif participant.name != participant_name: participant.name = participant_name participant.updated_by = "pipeline.wem.participant" logger.info( "Updated WEM participant: {}".format(participant_code) ) try: s.add(participant) s.commit() record_count += 1 except Exception as e: logger.error(e) s.close() return record_count
def get_unique_duid(units: List) -> Optional[str]: if len(units) < 1: return None first_record = units[0] if "duid" in first_record and first_record["duid"]: return normalize_duid(first_record["duid"]) return None
def process_item(self, item, spider=None): s = self.session() records_updated = 0 records_created = 0 for record in item: created = False duid = normalize_duid(record["STATIONID"]) name = station_name_cleaner(record["STATIONNAME"]) network_name = normalize_string(record["STATIONNAME"]) address1 = normalize_string(record["ADDRESS1"]) address2 = normalize_string(record["ADDRESS2"]) city = normalize_string(record["CITY"]) state = normalize_string(record["STATE"]).capitalize() postcode = normalize_string(record["POSTCODE"]) station = (s.query(Station).filter( Station.network_code == duid).one_or_none()) if not station: station = Station( code=duid, network_code=duid, created_by="au.nem.mms.stations", ) records_created += 1 created = True else: station.updated_by = "au.nem.mms.stations" records_updated += 1 station.name = name station.network_id = "NEM" station.network_name = network_name station.address1 = address1 station.address2 = address2 station.locality = city station.state = state station.postcode = postcode try: s.add(station) s.commit() except Exception as e: logger.error(e) logger.debug("{} station record with id {}".format( "Created" if created else "Updated", duid)) logger.info("Created {} records and updated {}".format( records_created, records_updated))
def get_station_record_from_facilities(units: list): if not type(units) is list or len(units) < 1: raise Exception("Passed units list with no units ..") if len(units) == 1: return units[0] for u in units: cap = clean_capacity(u["NameCapacity"]) duid = normalize_duid(u["duid"]) if cap: return u return units[0]
def process_item(self, item, spider=None): s = self.session() records_updated = 0 records_created = 0 for record in item: created = False duid = normalize_duid(record["DUID"]) capacity_registered = clean_capacity(record["REGISTEREDCAPACITY"]) capacity_max = clean_capacity(record["MAXCAPACITY"]) dispatch_type = parse_dispatch_type(record["DISPATCHTYPE"]) facility = (s.query(Facility).filter( Facility.network_code == duid).one_or_none()) if not facility: facility = Facility( code=duid, network_code=duid, status_id="retired", dispatch_type=dispatch_type, created_by="au.nem.mms.dudetail", ) records_created += 1 created = True else: facility.updated_by = "au.nem.mms.dudetail" records_updated += 1 facility.capacity_registered = capacity_registered facility.capacity_max = capacity_max try: s.add(facility) s.commit() except Exception as e: logger.error(e) logger.debug("MMS Dudetail: {} facility record with id {}".format( "Created" if created else "Updated", duid)) logger.info( "MMS Dudetail:Created {} facility records and updated {}".format( records_created, records_updated))
def process_meter_data_gen_duid(table): session = SessionLocal() engine = get_database_engine() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] records_to_store = [] for record in records: trading_interval = parse_date( record["INTERVAL_DATETIME"], network=NetworkNEM, dayfirst=False ) if not trading_interval: continue records_to_store.append( { "network_id": "NEM", "trading_interval": trading_interval, "facility_code": normalize_duid(record["DUID"]), "eoi_quantity": record["MWH_READING"], } ) stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="facility_scada_pkey", set_={"eoi_quantity": stmt.excluded.eoi_quantity,}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return 0 finally: session.close() return len(records_to_store)
def process_item(self, item, spider=None): session = SessionLocal() engine = get_database_engine() csvreader = csv.DictReader(item["content"].split("\n")) records_to_store = [] for row in csvreader: trading_interval = parse_date( row["Trading Interval"], dayfirst=True, network=NetworkWEM ) facility_code = normalize_duid(row["Facility Code"]) records_to_store.append( { "network_id": "WEM", "trading_interval": trading_interval, "facility_code": facility_code, "eoi_quantity": row["EOI Quantity (MW)"] or None, "generated": row["Energy Generated (MWh)"] or None, } ) stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="facility_scada_pkey", set_={ "eoi_quantity": stmt.excluded.eoi_quantity, "generated": stmt.excluded.generated, }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) finally: session.close() return len(records_to_store)
def process_item(self, item, spider=None): s = self.session() csvreader = csv.DictReader(item["content"].split("\n")) objects = [] for row in csvreader: trading_interval = self.parse_interval(row["Trading Interval"]) facility_code = normalize_duid(row["Facility Code"]) objects.append( { "network_id": "WEM", "trading_interval": trading_interval, "facility_code": facility_code, "eoi_quantity": row["EOI Quantity (MW)"] or None, "generated": row["Energy Generated (MWh)"] or None, } ) stmt = insert(FacilityScada).values(objects) stmt.bind = self.engine stmt = stmt.on_conflict_do_update( constraint="facility_scada_pkey", set_={ "eoi_quantity": stmt.excluded.eoi_quantity, "generated": stmt.excluded.generated, }, ) try: r = s.execute(stmt) s.commit() return r except Exception as e: logger.error("Error: {}".format(e)) finally: s.close() return None
def get_unit_code( unit: UnitSchema, duid: Optional[str] = None, station_name: Optional[str] = None, ) -> str: """ This takes the duid from the network and the unit info and creates a unique opennem code This should be unique across all units for a network """ unit_id = None # @TODO - check if we can strip, probably not a good idea if not duid: if not station_name: raise Exception( "Cannot generate a unit code without both duid and station name" ) duid = get_basecode(station_name) duid_clean = normalize_duid(duid) if not duid_clean.endswith(str(unit.id)): unit_id = str(unit.id) if unit_id == 1: unit_id = None components = [duid_clean, unit.alias, unit_id] # empty out None's components = [i for i in components if i] unit_code = "_".join(components) return unit_code
def import_emissions_map(file_name: str) -> None: """Import emission factors from CSV files for each network the format of the csv file is station_name,network_id,network_region,facility_code,emissions_factor_co2,fueltech_id,emission_factor_source """ session = SessionLocal() content = load_data(file_name, from_project=True, skip_loaders=True) csv_content = content.splitlines() csvreader = csv.DictReader(csv_content) for rec in csvreader: network_id = rec["network_id"] facility_code = normalize_duid(rec["facility_code"]) emissions_intensity = clean_float(rec["emissions_factor_co2"]) if not facility_code: logger.info("No emissions intensity for {}".format(facility_code)) continue facility = (session.query(Facility).filter_by( code=facility_code).filter_by(network_id=network_id).one_or_none()) if not facility: logger.info("No stored facility for {}".format(facility_code)) continue facility.emissions_factor_co2 = emissions_intensity session.add(facility) logger.info("Updated {} to {}".format(facility_code, emissions_intensity)) session.commit() return None
def code_clean(cls, v): return normalize_duid(v)
def process_facilities(self, records): s = self.session() # Store a list of all existing duids all_duids = list( set([ i[0] for i in s.query(Facility.network_code).filter( Facility.network_code != None).all() ])) for _, facility_records in records.items(): facility_index = 1 facility_station = None created_station = False station_network_name = record_get_station_name(facility_records) station_name = station_name_cleaner(station_network_name) duid_unique = has_unique_duid(facility_records) facility_count = len(facility_records) # Step 1. Find the station # First by duid if it's unique duid = get_unique_duid(facility_records) # all GI records should have a region station_network_region = get_unique_reqion(facility_records) # This is the most suitable unit record to use for the station # see helper above facility_station_record = get_station_record_from_facilities( facility_records) if duid and duid_unique and facility_count == 1: facility_lookup = None try: facility_lookup = (s.query(Facility).filter( Facility.network_code == duid).filter( Facility.network_region == station_network_region).one_or_none()) except MultipleResultsFound: logger.error( "Found multiple duid for station with code {}".format( duid)) continue if facility_lookup and facility_lookup.station: facility_station = facility_lookup.station if (duid and (duid_unique and facility_count > 1) or not duid_unique): facility_lookup = (s.query(Facility).filter( Facility.network_code == duid).filter( Facility.network_region == station_network_region).first()) if facility_lookup and facility_lookup.station: facility_station = facility_lookup.station if not facility_station and facility_station_join_by_name( station_name): try: facility_station = (s.query(Station).filter( Station.name == station_name).one_or_none()) except MultipleResultsFound: logger.warning( "Multiple results found for station name : {}".format( station_name)) facility_station = None # If we have a station name, and no duid, and it's ok to join by name # then find the station (make sure to region lock) if (station_name and not duid and not facility_station and facility_station_join_by_name(station_name)): facility = (s.query(Facility).join(Facility.station).filter( Facility.network_region == station_network_region).filter( Station.name == station_name).first()) if facility: facility_station = facility.station # Create one as it doesn't exist if not facility_station: facility_station = Station( name=station_name, network_name=name_normalizer( facility_station_record["station_name"]), network_id="NEM", created_by="pipeline.aemo.general_information", ) s.add(facility_station) s.commit() created_station = True else: facility_station.updated_by = ( "pipeline.aemo.general_information") for facility_record in facility_records: if facility_record["FuelType"] in ["Natural Gas Pipeline"]: continue # skip these statuses too if facility_record["UnitStatus"] in FACILITY_INVALID_STATUS: continue facility = None created_facility = False facility_network_name = name_normalizer( facility_record["station_name"]) facility_name = station_name_cleaner( facility_record["station_name"]) duid = normalize_duid(facility_record["duid"]) reg_cap = clean_capacity(facility_record["NameCapacity"]) units_num = facility_record["Units"] or 1 unit_id = facility_index + (units_num - 1) unit = parse_unit_duid(unit_id, duid) unit_size = clean_capacity(facility_record["unit_capacity"]) unit_code = get_unit_code(unit, duid, facility_record["station_name"]) facility_comissioned = facility_record["SurveyEffective"] facility_comissioned_dt = None if type(facility_comissioned) is datetime: facility_comissioned_dt = facility_comissioned try: if type(facility_comissioned) is str: facility_comissioned_dt = datetime.strptime( facility_comissioned, "%d/%m/%y") except ValueError: logger.error( "Error parsing date: {}".format(facility_comissioned)) facility_status = map_aemo_facility_status( facility_record["UnitStatus"]) facility_network_region = normalize_aemo_region( facility_record["Region"]) facility_fueltech = (lookup_fueltech( facility_record["FuelType"], techtype=facility_record["TechType"], ) if ("FuelType" in facility_record and facility_record["FuelType"]) else None) if not facility_fueltech: logger.error("Error looking up fueltech: {} {} ".format( facility_record["FuelType"], facility_record["TechType"], )) # check if we have it by ocode first facility = (s.query(Facility).filter( Facility.code == unit_code).one_or_none()) if not facility and duid: try: facility = ( s.query(Facility).filter( Facility.network_code == duid).filter( Facility.network_region == facility_network_region) # .filter(Facility.nameplate_capacity != None) .one_or_none()) except MultipleResultsFound: logger.warn( "Multiple results found for duid : {}".format( duid)) if facility: if facility.station and not facility_station: facility_station = facility.station logger.info( "GI: Found facility by DUID: code {} station {}". format( facility.code, facility.station.name if facility.station else None, )) # Done trying to find existing if not facility: facility = Facility( code=unit_code, network_code=duid, created_by="pipeline.aemo.general_information", ) facility.station = facility_station created_facility = True if duid and not facility.network_code: facility.network_code = duid facility.updated_by = "pipeline.aemo.general_information" if not facility.network_region: facility.network_region = facility_network_region facility.updated_by = "pipeline.aemo.general_information" if not facility.network_name: facility.network_name = facility_network_name facility.updated_by = "pipeline.aemo.general_information" if not facility.fueltech_id and facility_fueltech: facility.fueltech_id = facility_fueltech facility.updated_by = "pipeline.aemo.general_information" if not facility.capacity_registered or ( facility.status and facility.status != "operating"): facility.capacity_registered = reg_cap facility.updated_by = "pipeline.aemo.general_information" # @TODO work this out # facility.dispatch_type = facility_dispatch_type if not facility.unit_id: facility.unit_id = unit.id facility.unit_number = unit.number facility.unit_size = unit_size facility.unit_alias = unit.alias if not facility.unit_capacity or ( facility.status and facility.status != "operating"): facility.unit_capacity = unit_size facility.updated_by = "pipeline.aemo.general_information" # if not facility.status_id: facility.status_id = facility_status # facility.updated_by = "pipeline.aemo.general_information" if not facility.registered and facility_comissioned_dt: facility.registered = facility_comissioned_dt facility.updated_by = "pipeline.aemo.general_information" facility.station = facility_station if facility.fueltech_id is None: logger.warning("Could not find fueltech for: {} {}".format( facility.code, facility.network_code)) # facility.status_id = facility_status if facility_station and not facility.station: facility.station = facility_station if facility.status_id is None: raise Exception( "GI: Failed to map status ({}) on row: {}".format( facility.status_id, facility_record)) s.add(facility) s.commit() facility_index += units_num if created_station: logger.info("GI: {} station with name {} ".format( "Created" if created_station else "Updated", station_name, # facility_station.id, )) if created_facility: logger.info( "GI: {} facility with duid {} to station {}".format( "Created" if created_facility else "Updated", duid, station_name, )) try: s.commit() except Exception as e: logger.error(e) raise e finally: s.close()
def process_item(self, item, spider=None): session = SessionLocal() engine = get_database_engine() csvreader = csv.DictReader(item["content"].split("\n")) records_to_store = [] last_asat = None for row in csvreader: if row["AS_AT"] != "": last_asat = parse_date( row["AS_AT"], dayfirst=True, network=NetworkWEM ) if not last_asat or type(last_asat) is not datetime: logger.error("Invalid row or no datetime") continue # We need to pivot the table since columns are time intervals for i in range(1, 48): column = f"I{i:02}" if column not in row: logger.error( "Do not have data for interval {}".format(column) ) continue if i > 0: interval = last_asat - timedelta(minutes=(i - 1) * 30) else: interval = last_asat facility_code = normalize_duid(row["FACILITY_CODE"]) val = None try: val = float(row[column]) / 2 or None except ValueError: pass records_to_store.append( { "network_id": "WEM", "trading_interval": interval, "facility_code": facility_code, "generated": val, } ) stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="facility_scada_pkey", set_={"generated": stmt.excluded.generated,}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) finally: session.close() return len(records_to_store)
def process_dispatch_interconnectorres(table: Dict, spider: Spider) -> Dict: session = SessionLocal() engine = get_database_engine() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] records_to_store = [] for record in records: ti_value = None if "SETTLEMENTDATE" in record: ti_value = record["SETTLEMENTDATE"] if "RUN_DATETIME" in record: ti_value = record["RUN_DATETIME"] if not ti_value: raise Exception("Require a trading interval") trading_interval = parse_date(ti_value, network=NetworkNEM, dayfirst=False) if not trading_interval: continue facility_code = normalize_duid(record["INTERCONNECTORID"]) power_value = clean_float(record["METEREDMWFLOW"]) records_to_store.append({ "network_id": "NEM", "created_by": spider.name, "facility_code": facility_code, "trading_interval": trading_interval, "generated": power_value, }) # remove duplicates return_records_grouped = {} for pk_values, rec_value in groupby( records_to_store, key=lambda r: ( r.get("trading_interval"), r.get("network_id"), r.get("facility_code"), ), ): if pk_values not in return_records_grouped: return_records_grouped[pk_values] = list(rec_value).pop() records_to_store = list(return_records_grouped.values()) # insert stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "facility_code", "is_forecast" ], set_={"generated": stmt.excluded.generated}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return {"num_records": 0} finally: session.close() return {"num_records": len(records_to_store)}
def unit_scada_generate_facility_scada( records, spider=None, network: NetworkSchema = NetworkNEM, interval_field: str = "SETTLEMENTDATE", facility_code_field: str = "DUID", date_format: Optional[str] = None, power_field: Optional[str] = None, energy_field: Optional[str] = None, is_forecast: bool = False, primary_key_track: bool = False, groupby_filter: bool = True, created_by: str = None, limit: int = 0, duid: str = None, ) -> List[Dict]: created_at = datetime.now() primary_keys = [] return_records = [] created_by = "" if spider and hasattr(spider, "name"): created_by = spider.name for row in records: trading_interval = parse_date( row[interval_field], network=network, dayfirst=False, date_format=date_format, ) # if facility_code_field not in row: # logger.error("Invalid row no facility_code") # continue facility_code = normalize_duid(row[facility_code_field]) if duid and facility_code != duid: continue if primary_key_track: pkey = (trading_interval, facility_code) if pkey in primary_keys: continue primary_keys.append(pkey) generated = None if power_field and power_field in row: generated = clean_float(row[power_field]) if generated: generated = float_to_str(generated) energy = None if energy_field and energy_field in row: energy = clean_float(row[energy_field]) if energy: energy = float_to_str(energy) __rec = { "created_by": created_by, "created_at": created_at, "updated_at": None, "network_id": network.code, "trading_interval": trading_interval, "facility_code": facility_code, "generated": generated, "eoi_quantity": energy, "is_forecast": is_forecast, } return_records.append(__rec) if limit > 0 and len(return_records) >= limit: break if not groupby_filter: return return_records return_records_grouped = {} for pk_values, rec_value in groupby( return_records, key=lambda r: ( r.get("network_id"), r.get("trading_interval"), r.get("facility_code"), ), ): if pk_values not in return_records_grouped: return_records_grouped[pk_values] = list(rec_value).pop() return_records = list(return_records_grouped.values()) return return_records
def update_existing_geos() -> None: """ Old method to update geos from existing facilities file on OpenNEM """ station_fixture = load_data("facility_registry.json", from_fixture=True) stations = [{"station_code": k, **v} for k, v in station_fixture.items()] s = SessionLocal() for station_data in stations: station = None station_name = station_name_cleaner(station_data["display_name"]) station_code = normalize_duid(station_data["station_code"]) station_state = map_compat_facility_state( station_data["status"]["state"]) station = s.query(Station).filter( Station.network_code == station_code).one_or_none() if not station: logger.info("Could not find station {}".format(station_code)) continue if ("location" in station_data and "latitude" in station_data["location"] and station_data["location"]["latitude"]): station.geom = ("SRID=4326;POINT({} {})".format( station_data["location"]["latitude"], station_data["location"]["longitude"], ), ) station.geocode_processed_at = datetime.now() station.geocode_by = "opennem" station.geocode_approved = True station.updated_by = "fixture.registry" s.add(station) logger.info("Updated station geo location {} ({})".format( station.code, station.name, )) facilities = [{ "code": k, **v } for k, v in stations[0]["duid_data"].items()] # update fueltechs for facility_data in facilities: facility_duid = facility_data["code"] facility_fueltech = lookup_fueltech(facility_data["fuel_tech"]) facility = s.query(Facility).filter( Facility.network_code == facility_duid).first() if not facility: logger.error( "Could not find existing facility {} for station {}". format(facility_duid, station_code)) continue if not facility.fueltech_id: facility.fueltech_id = facility_fueltech if facility.fueltech_id != facility_fueltech: logger.error( "Fueltech mismatch for {}. Old is {} and new is {}".format( station_code, facility_fueltech, station.fueltech_id)) s.add(facility) s.commit()
def load_opennem_facilities(): station_fixture = load_data("facility_registry.json", from_fixture=True) stations = [{"station_code": k, **v} for k, v in station_fixture.items()] engine = db_connect() session = sessionmaker(bind=engine) s = session() for station_data in stations: station = None facilities = [{ "code": k, **v } for k, v in station_data["duid_data"].items()] # update facilities for facility_data in facilities: facility_duid = facility_data["code"] station_name = station_name_cleaner(station_data["display_name"]) # station_code = normalize_duid(station_data["station_code"]) station_code = facility_map_station( facility_duid, normalize_duid(station_data["station_code"])) station_state = map_compat_facility_state( station_data["status"]["state"]) station_network = "WEM" if station_data["location"][ "state"] == "WA" else "NEM" station = s.query(Station).filter( Station.network_code == station_code).one_or_none() if not station: station = Station( network_id=station_network, code=station_code, network_code=station_code, name=station_name, network_name=station_data["display_name"], created_by="opennem.load_facilities", ) logger.info("Created station: {} {} ".format( station_name, station_code)) s.add(station) s.commit() facility_status = station_state # Network region facility_network_region = map_compat_network_region( station_data["region_id"]) # Fueltech facility_fueltech = None if "fuel_tech" in facility_data and facility_data["fuel_tech"]: facility_fueltech = map_compat_fueltech( facility_data["fuel_tech"]) # Capacity facility_capacity = None if "registered_capacity" in facility_data and facility_data[ "registered_capacity"]: facility_capacity = clean_capacity( facility_data["registered_capacity"]) facility = None try: facility = (s.query(Facility).filter( Facility.network_code == facility_duid).one_or_none()) except MultipleResultsFound: logger.error("Multiple facilities found for duid {}".format( facility_duid)) # facility = ( # s.query(Facility) # .filter(Facility.network_code == facility_duid) # .first() # ) continue if not facility: facility = Facility( code=facility_duid, network_code=facility_duid, network_region=facility_network_region, created_by="opennem.load_facilities", ) logger.info("Created facility: {} {} to station {} ".format( facility_duid, facility_fueltech, station_code)) if not facility.unit_id: facility.unit_id = 1 facility.unit_number = 1 if facility_capacity and not facility.unit_capacity: facility.unit_capacity = facility_capacity if facility_capacity and not facility.capacity_registered: facility.capacity_registered = facility_capacity if facility_fueltech and not facility.fueltech_id: facility.fueltech_id = facility_fueltech if not facility.status_id: facility.status_id = facility_status if not facility.station: facility.station = station s.add(facility) s.commit()
def validate_linkid(cls, value: str) -> str: return normalize_duid(value)
def validate_linkid(cls, value: str) -> Optional[str]: return normalize_duid(value)
def test_duid_cleaner(duid: str, duid_expected: str) -> None: duid_return = normalize_duid(duid) assert duid_return == duid_expected, "Matched duid"
def process_item(self, item, spider=None): s = SessionLocal() records_added = 0 csvreader = csv.DictReader(item["content"].split("\n")) for row in csvreader: if "PARTICIPANT_CODE" not in row: logger.error("Invalid row") continue participant = None participant_code = normalize_duid(row["PARTICIPANT_CODE"]) participant = (s.query(Participant).filter( Participant.code == participant_code).one_or_none()) if not participant: participant = Participant( created_by=spider.name, approved_by=spider.name, approved_at=datetime.now(), code=participant_code, network_code=participant_code, network="WEM", ) s.add(participant) s.commit() logger.warning( "Participant not found created new database entry: {}". format(participant_code)) station = None facility = None facility_code = normalize_duid(row["FACILITY_CODE"]) station_code = parse_wem_facility_code(facility_code) station_name = station_name_cleaner(row["DISPLAY_NAME"]) station_network_name = normalize_string(row["DISPLAY_NAME"]) station = (s.query(Station).filter( Station.code == station_code).one_or_none()) if not station: station = Station( created_by=spider.name, approved_by=spider.name, approved_at=datetime.now(), code=station_code, network_code=station_code, participant=participant, ) location = Location(state="WA") s.add(location) station.location = location logger.debug("Added WEM station: {}".format(station_code)) lat = row["LATITUDE"] lng = row["LONGITUDE"] station.name = station_name station.network_name = station_network_name if lat and lng and not station.location.geom: station.location.geom = "SRID=4326;POINT({} {})".format( lat, lng) station.location.geocode_by = "aemo" station.location.geocode_approved = True facility = (s.query(Facility).filter( Facility.code == facility_code).one_or_none()) if not facility: facility = Facility( created_by="opennem.wem.live.facilities", approved_at=datetime.now(), network_id="WEM", code=facility_code, network_code=facility_code, network_region="WEM", ) registered_date = row["YEAR_COMMISSIONED"] if registered_date: registered_date_dt = None date_fmt = "%Y" try: registered_date_dt = datetime.strptime( registered_date, date_fmt) except Exception: logger.error("Bad date: %s for format %s", registered_date, date_fmt) if registered_date_dt: facility.registered = registered_date_dt fueltech = lookup_fueltech(fueltype=row["PRIMARY_FUEL"], techtype=row["FACILITY_TYPE"]) if fueltech and not facility.fueltech: facility.fueltech_id = fueltech facility.status_id = "operating" facility.station = station s.add(station) s.add(facility) s.commit() records_added += 1 try: s.commit() except IntegrityError as e: logger.error(e) pass except Exception as e: logger.error("Error: {}".format(e)) finally: s.close() return records_added
def process_item(self, item, spider): s = SessionLocal() records_added = 0 csvreader = csv.DictReader(item["content"].split("\n")) for row in csvreader: if "Participant Code" not in row: logger.error("Invalid row") continue if row["Facility Type"] in [ "Demand Side Program", "Non-Dispatchable Load", "Network", ]: continue participant = None participant_name = participant_name_filter(row["Participant Name"]) participant_network_name = normalize_string( row["Participant Name"]) participant_code = normalize_duid(row["Participant Code"]) participant = (s.query(Participant).filter( Participant.code == participant_code).one_or_none()) if not participant: participant = Participant( created_by=spider.name, approved_at=datetime.now(), code=participant_code, name=participant_name, network_name=participant_network_name, ) s.add(participant) s.commit() logger.debug( "Participant not found created new database entry: %s", participant_code, ) station = None facility = None facility_code = normalize_duid(row["Facility Code"]) station_code = parse_wem_facility_code(facility_code) station = (s.query(Station).filter( Station.code == station_code).one_or_none()) if not station: station = Station( created_by="opennem.wem.facilities", approved_at=datetime.now(), code=station_code, network_code=station_code, participant=participant, ) location = Location(state="WA") station.location = location logger.debug("Added WEM station: {}".format(station_code)) facility = (s.query(Facility).filter( Facility.code == facility_code).one_or_none()) if not facility: facility = Facility( created_by=spider.name, approved_at=datetime.now(), code=facility_code, network_id="WEM", network_code=facility_code, network_region="WEM", ) capacity_registered = clean_capacity(row["Maximum Capacity (MW)"]) capacity_unit = clean_capacity(row["Maximum Capacity (MW)"]) registered_date = row["Registered From"] facility.status_id = "operating" facility.capacity_registered = capacity_registered facility.unit_id = 1 facility.unit_number = 1 facility.unit_capacity = capacity_unit if registered_date: registered_date_dt = datetime.strptime(registered_date, "%Y-%m-%d %H:%M:%S") facility.registered = registered_date_dt facility.station = station s.add(facility) records_added += 1 try: s.commit() except IntegrityError as e: logger.error(e) pass except Exception as e: logger.error("Error: {}".format(e)) finally: s.close() return records_added
def process_item(self, item, spider=None): s = self.session() records_updated = 0 records_created = 0 for record in item: created = False participant_code = normalize_duid( record["facilities"][0]["PARTICIPANTID"]) # Step 1. Find participant by code or create participant = (s.query(ParticipantModel).filter( ParticipantModel.code == participant_code).one_or_none()) if not participant: participant = ParticipantModel( code=participant_code, network_code=participant_code, created_by="au.nem.mms.dudetail_summary", ) logger.debug("Created participant {}".format(participant_code)) else: participant.updated_by = "au.nem.mms.dudetail_summary" # Step 3. now create the facilities and associate for facility_record in record["facilities"]: duid = normalize_duid(facility_record["DUID"]) station_code = facility_map_station( duid, normalize_duid(record["id"])) network_region = normalize_aemo_region( facility_record["REGIONID"]) date_start = facility_record["date_start"] date_end = facility_record["date_end"] facility_state = "retired" # Step 2. Find station or create station = (s.query(Station).filter( Station.network_code == station_code).one_or_none()) if not station: station = Station( code=station_code, network_code=station_code, network_id="NEM", created_by="au.nem.mms.dudetail_summary", ) logger.debug("Created station {}".format(station_code)) else: station.updated_by = "au.nem.mms.dudetail_summary" station.participant = participant if date_end == None: facility_state = "operating" if not "DISPATCHTYPE" in facility_record: logger.error( "MMS dudetailsummary: Invalid record: {}".format( facility_record)) continue dispatch_type = parse_dispatch_type( facility_record["DISPATCHTYPE"]) facility = (s.query(Facility).filter( Facility.network_code == duid).one_or_none()) if not facility: facility = Facility( code=duid, network_code=duid, dispatch_type=dispatch_type, created_by="au.nem.mms.dudetail_summary", ) records_created += 1 created = True else: facility.updated_by = "au.nem.mms.dudetail_summary" records_updated += 1 facility.network_region = network_region facility.deregistered = date_end facility.registered = date_start facility.status_id = facility_state if not facility.dispatch_type: facility.dispatch_type = dispatch_type # Associations facility_station_id = facility_map_station(duid, station.id) facility.station_id = station.id try: s.add(facility) s.commit() except Exception as e: logger.error(e) logger.debug( "MMS DudetailSummary:{} facility record with id {}".format( "Created" if created else "Updated", duid)) logger.info( "MMS DudetailSummary: Created {} facility records and updated {}". format(records_created, records_updated))
def rel_grouper(records, station_code_map): records_parsed = [] for _id, i in enumerate(records, start=2000): name = station_name_cleaner(i["station_name"]) duid = normalize_duid(i["duid"]) unit = parse_unit_duid(i["unit_no"], duid) fueltech = lookup_fueltech( i["fuel_source_primary"], i["fuel_source_descriptor"], i["tech_primary"], i["tech_primary_descriptor"], i["dispatch_type"], ) station_code = lookup_station_code([duid], i["station_name"], station_code_map) records_parsed.append({ "name": name, "code": duid, "status": parse_facility_status("operating"), "station_code": station_code, "network_region": i["region"].strip(), "network_name": i["station_name"].strip(), "unit_size": clean_capacity(i["unit_size"]), "unit_code": get_unit_code(unit, duid, name), "dispatch_type": parse_dispatch_type(i["dispatch_type"]), "fueltech": parse_facility_fueltech(fueltech), "capacity_registered": clean_capacity(i["reg_cap"]), "capacity_maximum": clean_capacity(i["max_cap"]), }) grouped_records = {} for key, v in groupby(records_parsed, key=lambda v: v["station_code"]): # key = k[1 if key not in grouped_records: grouped_records[key] = [] grouped_records[key] += list(v) coded_records = {} _id = 2000 for station_code, rel in grouped_records.items(): station_name = rel[0]["network_name"] if station_code in coded_records: raise Exception("Code conflict: {}. {} {}".format( station_code, station_name, coded_records[station_code])) if not station_code: raise Exception("Unmapped station: {}".format(rel)) coded_records[station_code] = { "name": station_name_cleaner(station_name), "network_name": station_name, "code": station_code, "id": _id, "facilities": rel, } _id += 1 return coded_records