def run() -> None: comp_file = Path(__file__).parent / "v2_duid_dump.csv" csvrecords = [] with comp_file.open() as fh: fieldnames = fh.readline().strip().split(",") csvreader = csv.DictReader(fh, fieldnames=fieldnames) csvrecords = list(csvreader) records = [ CSVRecord(**i) for i in csvrecords if i["facility_code"] != "None" ] session = SessionLocal() for rec in records: facility: Facility = (session.query(Facility).filter_by( network_region=rec.network_region).filter_by( code=rec.facility_code).one_or_none()) if not facility: logger.debug("Could not find facility {} with fueltech {}".format( rec.facility_code, rec.fueltech_id)) continue if facility.fueltech_id != rec.fueltech_id: logger.info( "Fueltech mismatch for {}: v2 is {} and v3 is {}".format( rec.facility_code, rec.fueltech_id, facility.fueltech_id))
def import_photos_from_fixtures() -> None: """ Import photos to stations """ session = SessionLocal() photo_records = get_import_photo_data() for photo_record in photo_records: station = (session.query(Station).filter( Station.code == photo_record.station_code).one_or_none()) if not station: logger.error("Could not find station {}".format( photo_record.station_code)) continue img = get_image_from_web(photo_record.image_url) if not img: logger.error("No image for {}".format(photo_record.image_url)) continue hash_id = image_get_crypto_hash(img)[-8:] file_name = "{}_{}_{}.{}".format(hash_id, station.name.replace(" ", "_"), "original", "jpeg") photo = session.query(Photo).filter_by(hash_id=hash_id).one_or_none() if not photo: photo = Photo(hash_id=hash_id, ) photo.name = file_name photo.width = img.size[0] photo.height = img.size[1] photo.original_url = photo_record.image_url photo.license_type = photo_record.license photo.license_link = photo_record.license_link photo.author = photo_record.author photo.author_link = photo_record.author_link if photo_record.is_primary: photo.is_primary = True photo.approved = True photo.approved_by = "opennem.importer.photos" photo.approved_at = datetime.now() img_buff = img_to_buffer(img) write_photo_to_s3(file_name, img_buff) if station: station.photos.append(photo) session.add(photo) if station: session.add(station) session.commit()
def get_interconnector_facility(facility_code: str) -> Facility: session = SessionLocal() _fac = session.query(Facility).filter_by(code=facility_code).one_or_none() if not _fac: raise Exception("Could not find facility {}".format(facility_code)) return _fac
def rooftop_facilities() -> None: session = SessionLocal() for state_map in STATE_NETWORK_REGION_MAP: state_rooftop_code = "{}_{}_{}".format( ROOFTOP_CODE, state_map["network"].upper(), state_map["state"].upper(), ) rooftop_station = session.query(Station).filter_by( code=state_rooftop_code).one_or_none() if not rooftop_station: logger.info("Creating new station {}".format(state_rooftop_code)) rooftop_station = Station(code=state_rooftop_code, ) rooftop_station.name = "Rooftop Solar {}".format(state_map["state"]) rooftop_station.description = "Solar rooftop facilities for {}".format( state_map["state"]) rooftop_station.approved = False rooftop_station.approved_by = "" rooftop_station.created_by = "opennem.importer.rooftop" if not rooftop_station.location: rooftop_station.location = Location(state=state_map["state"]) rooftop_fac = session.query(Facility).filter_by( code=state_rooftop_code).one_or_none() if not rooftop_fac: logger.info("Creating new facility {}".format(state_rooftop_code)) rooftop_fac = Facility(code=state_rooftop_code) network = state_map["network"] # map to separate AEMO rooftop network if network.upper() == "NEM": network = "AEMO_ROOFTOP" rooftop_fac.network_id = network rooftop_fac.network_region = state_map["network_region"] rooftop_fac.fueltech_id = "solar_rooftop" rooftop_fac.status_id = "operating" rooftop_fac.active = True rooftop_fac.dispatch_type = DispatchType.GENERATOR rooftop_fac.approved_by = "opennem.importer.rooftop" rooftop_fac.created_by = "opennem.importer.rooftop" rooftop_station.facilities.append(rooftop_fac) session.add(rooftop_fac) session.add(rooftop_station) session.commit()
def load_station_timezone_map() -> Dict: s = SessionLocal() stations = s.query(BomStation).all() station_map = { s.code: STATE_TO_TIMEZONE[s.state.upper()] for s in stations } return station_map
def process_item(self, item, spider=None): s = SessionLocal() record_count = 0 csvreader = csv.DictReader(item["content"].split("\n")) for row in csvreader: if "PARTICIPANT_CODE" not in row: logger.error("Invalid row") continue participant = None participant_code = normalize_duid( normalize_string(row["PARTICIPANT_CODE"]) ) participant_name = participant_name_filter( row["PARTICIPANT_DISPLAY_NAME"] ) or participant_name_filter( row.get("PARTICIPANT_FULL_NAME", None) ) participant = ( s.query(Participant) .filter(Participant.code == participant_code) .one_or_none() ) if not participant: participant = Participant( code=participant_code, name=participant_name, created_by=spider.name, ) logger.info( "Created new WEM participant: {}".format(participant_code) ) elif participant.name != participant_name: participant.name = participant_name participant.updated_by = spider.name logger.info( "Updated WEM participant: {}".format(participant_code) ) try: s.add(participant) s.commit() record_count += 1 except Exception as e: logger.error(e) s.close() return record_count
def get_network_regions( network: NetworkSchema, network_region: Optional[str] = None) -> List[NetworkRegion]: """Return regions for a network""" s = SessionLocal() regions = s.query(NetworkRegion).filter_by(network_id=network.code) if network_region: regions = regions.filter_by(code=network_region) regions = regions.all() return regions
def process_item(self, item, spider=None): s = SessionLocal() record_count = 0 csvreader = csv.DictReader(item["content"].split("\n")) for row in csvreader: if "Participant Code" not in row: logger.error("Invalid row") continue participant = None participant_code = normalize_duid(row["Participant Code"]) participant_name = participant_name_filter(row["Participant Name"]) participant = ( s.query(Participant) .filter(Participant.code == participant_code) .one_or_none() ) if not participant: participant = Participant( code=participant_code, name=participant_name, # @TODO WEM provides these but nem doesn't so ignore for noe # address=row["Address"], # city=row["City"], # state=row["State"], # postcode=row["Postcode"], created_by="pipeline.wem.participant", ) logger.info( "Created new WEM participant: {}".format(participant_code) ) elif participant.name != participant_name: participant.name = participant_name participant.updated_by = "pipeline.wem.participant" logger.info( "Updated WEM participant: {}".format(participant_code) ) try: s.add(participant) s.commit() record_count += 1 except Exception as e: logger.error(e) s.close() return record_count
def store_bom_records_temp_max(recs: List[Dict]) -> None: # records_to_store = [i for i in recs if i["temp_max"] is not None] records_to_store = recs first = recs[0] update_field = "temp_max" if "temp_min" in first: update_field = "temp_min" engine = get_database_engine() session = SessionLocal() # insert stmt = insert(BomObservation).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["observation_time", "station_id"], set_={update_field: getattr(stmt.excluded, update_field)}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return {"num_records": 0} finally: session.close() return {"num_records": len(records_to_store)}
def process_unit_solution(table): session = SessionLocal() engine = get_database_engine() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] records_to_store = [] records_primary_keys = [] for record in records: trading_interval = parse_date( record["SETTLEMENTDATE"], network=NetworkNEM, dayfirst=False ) facility_code = normalize_duid(record["DUID"]) if not trading_interval or not facility_code: continue # Since this can insert 1M+ records at a time we need to # do a separate in-memory check of primary key constraints # better way of doing this .. @TODO _unique_set = (trading_interval, facility_code, "NEM") if _unique_set not in records_primary_keys: records_to_store.append( { "trading_interval": trading_interval, "facility_code": facility_code, "eoi_quantity": float(record["INITIALMW"]), "network_id": "NEM", } ) records_primary_keys.append(_unique_set) # free records_primary_keys = [] logger.debug("Saving %d records", len(records_to_store)) stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="facility_scada_pkey", set_={"eoi_quantity": stmt.excluded.eoi_quantity}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) return 0 finally: session.close() return len(records_to_store)
def store_stats_database(statset: StatsSet) -> int: s = SessionLocal() records_to_store = [i.dict() for i in statset.stats] stmt = insert(Stats).values(records_to_store) stmt.bind = get_database_engine() stmt = stmt.on_conflict_do_update( index_elements=[ "stat_date", "country", "stat_type", ], set_={ "value": stmt.excluded.value, }, ) try: s.execute(stmt) s.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return 0 finally: s.close() num_records = len(records_to_store) logger.info("Wrote {} records to database".format(num_records)) return num_records
def get_network_region_schema( network: NetworkSchema, network_region_code: Optional[str] = None ) -> List[NetworkRegionSchema]: """Return regions for a network""" s = SessionLocal() regions_query = s.query(NetworkRegion).filter_by(network_id=network.code) if network_region_code: regions_query = regions_query.filter_by(code=network_region_code) regions_result = regions_query.all() regions = [NetworkRegionSchema.from_orm(i) for i in regions_result] return regions
def facility_matcher(records: List[AEMOGIRecord]) -> None: with SessionLocal() as sess: for gi_record in records: if not gi_record.duid: continue gi_lookup: Optional[Facility] = sess.execute( select(Facility).where(Facility.code == gi_record.duid) ).one_or_none() if not gi_lookup: logger.info(f"MISS: {gi_record.duid} {gi_record.name}") continue gi_db: Facility = gi_lookup[0] logger.info( f"HIT {gi_record.duid} {gi_record.name} - currently {gi_db.status_id} change to => {gi_record.status_id}" ) gi_db.status_id = gi_record.status_id sess.add(gi_db) sess.commit()
def export_facility_geojson() -> None: """Get the GeoJSON for facilities and write it to S3""" with SessionLocal() as session: facility_geo = geo_facilities_api(only_approved=True, session=session) write_to_s3(facility_geo.json(exclude_unset=True), "/v3/geo/au_facilities.json")
def wikidata_join(): session = SessionLocal() wikidata = load_data("wikidata-parsed.json", from_project=True) # session.add() for entry in wikidata: station_name = entry.get("name") station_lookup = (session.query(Station).filter( Station.name == station_name).all()) if len(station_lookup) == 0: logger.info("Didn't find a station for {}".format(station_name)) if len(station_lookup) == 1: station = station_lookup.pop() station.description = entry.get("description") station.wikipedia_link = entry.get("wikipedia") station.wikidata_id = entry.get("wikidata_id") session.add(station) logger.info("Updated station {}".format(station_name)) if len(station_lookup) > 1: logger.info("Found multiple for station {}".format(station_name)) session.commit()
def load_network_regions() -> None: """ Load the network region fixture """ fixture = load_data("network_regions.json", from_fixture=True) s = SessionLocal() for network_region in fixture: network_region_model = (s.query(NetworkRegion).filter_by( code=network_region["code"], network_id=network_region["network_id"]).one_or_none()) if not network_region_model: network_region_model = NetworkRegion(code=network_region["code"]) network_region_model.network_id = network_region["network_id"] try: s.add(network_region_model) s.commit() logger.debug("Loaded network region {}".format( network_region_model.code)) except Exception: logger.error("Have {}".format(network_region_model.code))
def load_bom_stations_csv() -> None: """ Imports the BOM fixed-width stations format Made redundant with the new JSON """ s = SessionLocal() station_csv = load_data("stations_db.txt", from_fixture=True) lines = station_csv.split("\n") for line in lines: code, state, name, registered, lng, lat = parse_fixed_line(line) station = s.query(BomStation).filter_by(code=code).one_or_none() if not station: station = BomStation( code=code, state=state, name=name, registered=registered, ) station.geom = "SRID=4326;POINT({} {})".format(lat, lng) try: s.add(station) s.commit() except Exception: logger.error("Have {}".format(station.code))
def load_bom_stations_json() -> None: """ Imports BOM stations into the database from bom_stations.json The json is obtained using scripts/bom_stations.py """ session = SessionLocal() bom_stations = load_data("bom_stations.json", from_project=True) bom_capitals = load_data("bom_capitals.json", from_project=True) codes = [] if not bom_stations: logger.error("Could not load bom stations") stations_imported = 0 for bom_station in bom_stations: if "code" not in bom_station: logger.error("Invalida bom station ..") continue if bom_station["code"] in codes: continue codes.append(bom_station["code"]) station = session.query(BomStation).filter_by( code=bom_station["code"]).one_or_none() if not station: logger.info("New BOM station: %s", bom_station["name"]) station = BomStation(code=bom_station["code"], ) station.name = bom_station["name_full"] station.name_alias = bom_station["name"] station.website_url = bom_station["url"] station.feed_url = bom_station["json_feed"] station.priority = 5 station.state = bom_station["state"] station.altitude = bom_station["altitude"] if "web_code" in bom_station: station.web_code = bom_station["web_code"] if bom_station["code"] in bom_capitals: station.is_capital = True station.priority = 1 station.geom = "SRID=4326;POINT({} {})".format(bom_station["lng"], bom_station["lat"]) stations_imported += 1 session.add(station) logger.info("Imported {} stations".format(stations_imported)) session.commit()
def load_networks() -> None: """ Load the networks fixture """ fixture = load_data("networks.json", from_fixture=True) s = SessionLocal() for network in fixture: network_model = s.query(Network).filter_by( code=network["code"]).one_or_none() if not network_model: network_model = Network(code=network["code"]) network_model.label = network["label"] network_model.country = network["country"] network_model.timezone = network["timezone"] network_model.timezone_database = network["timezone_database"] network_model.offset = network["offset"] network_model.interval_size = network["interval_size"] network_model.network_price = network["network_price"] if "interval_shift" in network: network_model.interval_shift = network["interval_shift"] if "export_set" in network: network_model.export_set = network["export_set"] try: s.add(network_model) s.commit() except Exception: logger.error("Have {}".format(network_model.code))
def process_case_solutions(table): session = SessionLocal() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] records_to = []
def process_item(self, item, spider=None): s = SessionLocal() csvreader = csv.DictReader(item["content"].split("\n")) records_to_store = [] primary_keys = [] for row in csvreader: trading_interval = parse_date(row["TRADING_DAY_INTERVAL"], network=NetworkWEM, dayfirst=False) if trading_interval not in primary_keys: forecast_load = clean_float(row["FORECAST_EOI_MW"]) records_to_store.append({ "created_by": spider.name, "trading_interval": trading_interval, "network_id": "WEM", "network_region": "WEM", "forecast_load": forecast_load, # generation_scheduled=row["Scheduled Generation (MW)"], # generation_total=row["Total Generation (MW)"], "price": clean_float(row["PRICE"]), }) primary_keys.append(trading_interval) stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = get_database_engine() stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "network_region", ], set_={ "price": stmt.excluded.price, "forecast_load": stmt.excluded.forecast_load, }, ) try: s.execute(stmt) s.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return 0 finally: s.close() return len(records_to_store)
def dump_facilities() -> None: """Dump facilities to JSON""" station_map = [] with SessionLocal() as sess: stations = (sess.query(Station).join(Facility, Location).filter( Facility.network_id.in_(["NEM", "WEM"])).order_by(Facility.network_id, Facility.network_region, Station.code).all()) logger.debug(f"Got {len(stations)} stations") for station in stations: station_dict = clean_station_model_keys(station.__dict__) station_dict["facilities"] = [] station_dict["location"] = None if station.location: location_dict = clean_location_model_keys( station.location.__dict__) # @NOTE hardcoded as it's WEM/NEM for now location_dict["country"] = "AU" # @NOTE lat and lng set as they're props location_dict["lat"] = None location_dict["lng"] = None if station.location.lat: location_dict["lat"] = station.location.lat if station.location.lng: location_dict["lng"] = station.location.lng location_model = LocationSchema(**location_dict) station_dict["location"] = location_model for facility in station.facilities: logger.debug( f"{facility.network.code} - Station {station.name} - {facility.code}" ) facility_model = FacilitySchema( **clean_facility_model_keys(facility.__dict__)) station_dict["facilities"].append(facility_model) station_model = StationSchema(**station_dict) # print(station.code) station_map.append(station_model) with open("stations.json", "w+") as fh: # fh.write() json.dump(station_map, fh, cls=OpenNEMJSONEncoder, indent=4)
def get_stations() -> List[BomStationSchema]: """Get all weather stations @NOTE This is a bit redundant """ with SessionLocal() as session: stations = session.query(BomStation).filter( BomStation.priority >= 2).all() all_models = [BomStationSchema.from_orm(i) for i in stations] return all_models
def wikidata_join_mapping() -> None: """Attempts to join the wikidata to OpenNEM stations using the csv file with mappings""" session = SessionLocal() wikidata = load_data("wikidata-parsed.json", from_project=True) wikidata_mappings = None with open("opennem/data/wikidata_mappings.csv") as fh: csvreader = csv.DictReader( fh, fieldnames=[ "code", "name", "network_id", "network_region", "fueltech_id", "wikidata_id", ], ) wikidata_mappings = { i["code"]: i["wikidata_id"] for i in list(csvreader) if i["wikidata_id"] and i["code"] != "code" } for station_code, wikidata_id in wikidata_mappings.items(): wikidata_record_lookup = list( filter(lambda x: x["wikidata_id"] == wikidata_id, wikidata)) if len(wikidata_record_lookup) == 0: logger.error("Could not find {}".format(wikidata_id)) continue wikidata_record = wikidata_record_lookup.pop() station = session.query(Station).filter( Station.code == station_code).one_or_none() if not station: logger.error("Didn't find a station for {}".format(station_code)) continue station.description = wikidata_record.get("description") station.wikipedia_link = wikidata_record.get("wikipedia") station.wikidata_id = wikidata_record.get("wikidata_id") session.add(station) logger.info("Updated station {}".format(station_code)) session.commit()
def update_weather() -> None: wc = WillyClient() for bom_code, willyid in WILLY_MAP.items(): r = wc.get_location_temp(willyid, days=3, start_date="2021-10-14") data_points = r["observationalGraphs"]["temperature"]["dataConfig"][ "series"]["groups"] records = [] pprint(r) for pointset in data_points: # print(p) for p in pointset["points"]: r_dict = { "station_id": bom_code, "observation_time": unix_timestamp_to_aware_datetime(p["x"], "Australia/Sydney"), "temp_air": p["y"], } print("{} -> {}".format(r_dict["observation_time"], r_dict["temp_air"])) records.append(r_dict) session = SessionLocal() engine = get_database_engine() stmt = insert(BomObservation).values(records) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["observation_time", "station_id"], set_={ # "temp_apparent": stmt.excluded.temp_apparent, "temp_air": stmt.excluded.temp_air, }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) finally: session.close()
def process_meter_data_gen_duid(table): session = SessionLocal() engine = get_database_engine() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] records_to_store = [] for record in records: trading_interval = parse_date( record["INTERVAL_DATETIME"], network=NetworkNEM, dayfirst=False ) if not trading_interval: continue records_to_store.append( { "network_id": "NEM", "trading_interval": trading_interval, "facility_code": normalize_duid(record["DUID"]), "eoi_quantity": record["MWH_READING"], } ) stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="facility_scada_pkey", set_={"eoi_quantity": stmt.excluded.eoi_quantity,}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return 0 finally: session.close() return len(records_to_store)
def process_pre_ap_price(table): session = SessionLocal() engine = get_database_engine() if "records" not in table: raise Exception("Invalid table no records") records = table["records"] records_to_store = [] for record in records: trading_interval = parse_date( record["SETTLEMENTDATE"], network=NetworkNEM, dayfirst=False ) if not trading_interval: continue records_to_store.append( { "network_id": "NEM", "network_region": record["REGIONID"], "trading_interval": trading_interval, "price": record["PRE_AP_ENERGY_PRICE"], } ) stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="balancing_summary_pkey", set_={"price": stmt.excluded.price,}, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error inserting records") logger.error(e) return 0 finally: session.close() return len(records_to_store)
def process_item(self, item, spider=None): session = SessionLocal() engine = get_database_engine() csvreader = csv.DictReader(item["content"].split("\n")) records_to_store = [] for row in csvreader: trading_interval = parse_date( row["Trading Interval"], dayfirst=True, network=NetworkWEM ) facility_code = normalize_duid(row["Facility Code"]) records_to_store.append( { "network_id": "WEM", "trading_interval": trading_interval, "facility_code": facility_code, "eoi_quantity": row["EOI Quantity (MW)"] or None, "generated": row["Energy Generated (MWh)"] or None, } ) stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( constraint="facility_scada_pkey", set_={ "eoi_quantity": stmt.excluded.eoi_quantity, "generated": stmt.excluded.generated, }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) finally: session.close() return len(records_to_store)
def process_item(self, item, spider=None): if "records" not in item: logger.error("Invalid return response") records = item["records"] engine = get_database_engine() session = SessionLocal() records_to_store = [] for record in records: records_to_store.append({ # "network_id": "WEM" if state == "WA" else "NEM", # "trading_interval": interval_time, # "facility_code": facility_code, # "generated": generated, }) # free primary_keys = None stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "facility_code", "is_forecast" ], set_={ "generated": stmt.excluded.generated, }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) finally: session.close() return len(records_to_store)
def process_item(self, item, spider): s = SessionLocal() observation = BomObservation( station_id=item["station_id"], observation_time=parse_date(item["aifstime_utc"]), temp_apparent=item["apparent_t"], temp_air=item["air_temp"], press_qnh=item["press_qnh"], wind_dir=item["wind_dir"], wind_spd=item["wind_spd_kmh"], ) try: s.add(observation) s.commit() except Exception as e: logger.error("Error: {}".format(e)) finally: s.close() return item