def crawler_get_meta(crawler_name: str, key: CrawlStatTypes) -> Optional[Union[str, datetime]]: """Crawler get specific stat type from metadata for crawler name""" with get_scoped_session() as session: spider_meta = session.query(CrawlMeta).filter_by( spider_name=crawler_name).one_or_none() if not spider_meta: return None if not spider_meta.data: return None if key.value not in spider_meta.data: return None _val = spider_meta.data[key.value] if key in [ CrawlStatTypes.latest_processed, CrawlStatTypes.last_crawled, CrawlStatTypes.server_latest, ]: _val_processed = datetime.fromisoformat(_val) return _val_processed return _val
def export_historic_intervals(limit: int | None = None) -> None: """ """ session = get_scoped_session() networks = [NetworkNEM, NetworkWEM] for network in networks: network_regions: list[NetworkRegion] = ( session.query(NetworkRegion).filter( NetworkRegion.network_id == network.code).all()) for network_region in network_regions: scada_range: ScadaDateRange = get_scada_range(network=network, networks=networks, energy=False) if not scada_range or not scada_range.start: logger.error( "Could not get scada range for network {}".format(network)) continue for week_start, week_end in week_series_datetimes( start=scada_range.end, end=scada_range.start, length=limit): if week_start > get_today_opennem(): continue try: export_network_intervals_for_week( week_start, week_end, network=network, network_region=network_region) except Exception as e: logger.error(f"export_historic_intervals error: {e}")
def crawler_set_meta(crawler_name: str, key: CrawlStatTypes, value: Any) -> None: """Set a crawler metadata stat type by name""" with get_scoped_session() as session: if key == CrawlStatTypes.server_latest: current_value = crawler_get_meta(crawler_name, key) try: if current_value and current_value >= value: return None except TypeError: logger.error("Error comparing {} ({}) and {} ({})".format( current_value, type(current_value), value, type(value))) spider_meta = session.query(CrawlMeta).filter_by( spider_name=crawler_name).one_or_none() if not spider_meta: spider_meta = CrawlMeta(spider_name=crawler_name, data={}) spider_meta.data[key.value] = value logger.debug("Spider {} meta: Set {} to {}".format( crawler_name, key.value, value)) session.add(spider_meta) session.commit()
def process_nem_price(table: AEMOTableSchema) -> ControllerReturn: """Stores the NEM price for both dispatch price and trading price""" session = get_scoped_session() engine = get_database_engine() cr = ControllerReturn(total_records=len(table.records)) records_to_store = [] primary_keys = [] price_field = "price" if table.full_name == "dispatch_price": price_field = "price_dispatch" for record in table.records: # @NOTE disable pk track trading_interval = parse_date(record["settlementdate"]) primary_key = set([trading_interval, record["regionid"]]) # type: ignore if primary_key in primary_keys: continue primary_keys.append(primary_key) records_to_store.append({ "network_id": "NEM", "created_by": "opennem.controllers.nem", "network_region": record["regionid"], "trading_interval": trading_interval, price_field: record["rrp"], }) cr.processed_records += 1 stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["trading_interval", "network_id", "network_region"], set_={price_field: getattr(stmt.excluded, price_field)}, ) try: session.execute(stmt) session.commit() cr.inserted_records = cr.processed_records cr.server_latest = max( [i["trading_interval"] for i in records_to_store]) except Exception as e: logger.error("Error inserting NEM price records") logger.error(e) cr.errors = cr.processed_records finally: session.rollback() session.close() engine.dispose() return cr
def store_wem_facility_intervals( balancing_set: WEMFacilityIntervalSet) -> ControllerReturn: """Persist WEM facility intervals""" engine = get_database_engine() session = get_scoped_session() cr = ControllerReturn() records_to_store = [] if not balancing_set.intervals: return cr cr.total_records = len(balancing_set.intervals) cr.server_latest = balancing_set.server_latest for _rec in balancing_set.intervals: records_to_store.append({ "created_by": "wem.controller", "network_id": "WEM", "trading_interval": _rec.trading_interval, "facility_code": _rec.facility_code, "generated": _rec.generated, "eoi_quantity": _rec.eoi_quantity, }) cr.processed_records += 1 if len(records_to_store) < 1: return cr stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "facility_code", "is_forecast" ], set_={ "generated": stmt.excluded.generated, "eoi_quantity": stmt.excluded.eoi_quantity, }, ) try: session.execute(stmt) session.commit() cr.inserted_records = len(records_to_store) except Exception as e: logger.error("Error: {}".format(e)) cr.errors = len(records_to_store) cr.error_detail.append(str(e)) finally: session.close() engine.dispose() return cr
def process_dispatch_interconnectorres( table: AEMOTableSchema) -> ControllerReturn: session = get_scoped_session() engine = get_database_engine() cr = ControllerReturn(total_records=len(table.records)) records_to_store = [] primary_keys = [] for record in table.records: primary_key = set( [record["settlementdate"], record["interconnectorid"]]) if primary_key in primary_keys: continue primary_keys.append(primary_key) records_to_store.append({ "network_id": "NEM", "created_by": "opennem.controller", "facility_code": record["interconnectorid"], "trading_interval": record["settlementdate"], "generated": record["mwflow"], }) cr.processed_records += 1 # insert stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "facility_code", "is_forecast" ], set_={"generated": stmt.excluded.generated}, ) try: session.execute(stmt) session.commit() cr.inserted_records = cr.processed_records cr.server_latest = max( [i["trading_interval"] for i in records_to_store]) except Exception as e: logger.error("Error inserting records") logger.error(e) cr.errors = cr.processed_records return cr finally: session.rollback() session.close() engine.dispose() return cr
def crawler_get_all_meta(crawler_name: str) -> Optional[Dict[str, Any]]: """Get crawler metadata by crawler name""" with get_scoped_session() as session: spider_meta = session.query(CrawlMeta).filter_by( spider_name=crawler_name).one_or_none() if not spider_meta: return None if not spider_meta.data: return None return spider_meta.data
def update_network_data_ranges( data_ranges: list[NetworkDataDateRanges]) -> None: """Updates the data ranges in the network""" with get_scoped_session() as sess: for date_range in data_ranges: network = sess.query(Network).get(date_range.network) if not network: raise Exception(f"Could not find network {date_range.network}") network.data_start_date = date_range.data_min network.data_end_date = date_range.data_max sess.add(network) sess.commit()
def store_apvi_forecastset(forecast_set: APVIForecastSet) -> ControllerReturn: """Persist an APVI forecast set to the database""" engine = get_database_engine() session = get_scoped_session() cr = ControllerReturn() records_to_store = [] if not forecast_set.intervals: return cr cr.total_records = len(forecast_set.intervals) for _rec in forecast_set.intervals: records_to_store.append( {**_rec.dict(exclude={"state"}), "created_by": "apvi.controller", "is_forecast": False} ) cr.processed_records += 1 if len(records_to_store) < 1: return cr stmt = insert(FacilityScada).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["trading_interval", "network_id", "facility_code", "is_forecast"], set_={ "generated": stmt.excluded.generated, "created_by": stmt.excluded.created_by, }, ) try: session.execute(stmt) session.commit() cr.inserted_records = len(records_to_store) except Exception as e: logger.error("Error: {}".format(e)) cr.errors = len(records_to_store) cr.error_detail.append(str(e)) finally: session.close() engine.dispose() return cr
def update_apvi_facility_capacities(forecast_set: APVIForecastSet) -> None: """Updates facility capacities for APVI rooftops""" session = get_scoped_session() if not forecast_set.capacities: return None for state_capacity in forecast_set.capacities: state_facility: Facility = ( session.query(Facility).filter_by(code=state_capacity.facility_code).one_or_none() ) if not state_facility: raise Exception("Could not find rooftop facility for %s", state_capacity.facility_code) state_facility.capacity_registered = state_capacity.capacity_registered state_facility.unit_number = state_capacity.unit_number session.add(state_facility) session.commit()
def get_api_key_record(api_key: str) -> AuthApiKeyRecord: """Get an API Key record from the database""" session = get_scoped_session() try: api_key = validate_api_key(api_key) except Exception as e: logger.error("Bad API key {}: {}".format(api_key, e)) raise UnauthorizedRequest() api_key_record: Optional[ApiKeys] = (session.query(ApiKeys).filter_by( keyid=api_key).one_or_none()) if not api_key_record: logger.error("API key not found: {}".format(api_key)) raise BadCredentialsKeyNotFound() if api_key_record.revoked: logger.error("API key revoked: {}".format(api_key)) raise RevokedCredentials() api_key_schema = AuthApiKeyRecord.from_orm(api_key_record) return api_key_schema
def export_all_daily( networks: List[NetworkSchema] = [NetworkNEM, NetworkWEM], network_region_code: Optional[str] = None, ) -> None: session = get_scoped_session() cpi = gov_stats_cpi() for network in networks: network_regions = session.query(NetworkRegion).filter_by( export_set=True).filter_by(network_id=network.code) if network_region_code: network_regions = network_regions.filter_by( code=network_region_code) network_regions = network_regions.all() for network_region in network_regions: logging.info("Exporting for network {} and region {}".format( network.code, network_region.code)) networks = [ NetworkNEM, NetworkAEMORooftop, NetworkAEMORooftopBackfill ] if network_region.code == "WEM": networks = [NetworkWEM, NetworkAPVI] scada_range: ScadaDateRange = get_scada_range(network=network, networks=networks, energy=True) if not scada_range or not scada_range.start: logger.error( "Could not get scada range for network {} and energy {}". format(network, True)) continue time_series = TimeSeries( start=scada_range.start, end=scada_range.end, network=network, interval=human_to_interval("1d"), period=human_to_period("all"), ) stat_set = energy_fueltech_daily( time_series=time_series, networks_query=networks, network_region_code=network_region.code, ) if not stat_set: continue demand_energy_and_value = demand_network_region_daily( time_series=time_series, network_region_code=network_region.code, networks=networks) stat_set.append_set(demand_energy_and_value) # Hard coded to NEM only atm but we'll put has_interconnectors # in the metadata to automate all this if network == NetworkNEM: interconnector_flows = energy_interconnector_flows_and_emissions( time_series=time_series, networks_query=networks, network_region_code=network_region.code, ) stat_set.append_set(interconnector_flows) bom_station = get_network_region_weather_station( network_region.code) if bom_station: try: weather_stats = weather_daily( time_series=time_series, station_code=bom_station, network_region=network_region.code, ) stat_set.append_set(weather_stats) except Exception: pass if cpi: stat_set.append_set(cpi) write_output(f"v3/stats/au/{network_region.code}/daily.json", stat_set)
def process_dispatch_regionsum(table: AEMOTableSchema) -> ControllerReturn: session = get_scoped_session() engine = get_database_engine() cr = ControllerReturn(total_records=len(table.records)) records_to_store = [] primary_keys = [] for record in table.records: if not isinstance(record, dict): continue trading_interval = parse_date(record.get("settlementdate")) primary_key = set([trading_interval, record["regionid"]]) if primary_key in primary_keys: continue primary_keys.append(primary_key) if "demand_and_nonschedgen" not in record: raise Exception("bad value in dispatch_regionsum") records_to_store.append({ "network_id": "NEM", "created_by": "opennem.controller", "network_region": record["regionid"], "trading_interval": trading_interval, "net_interchange": record["netinterchange"], "demand": record["totaldemand"], "demand_total": record["demand_and_nonschedgen"], }) cr.processed_records += 1 stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["trading_interval", "network_id", "network_region"], set_={ "net_interchange": stmt.excluded.net_interchange, "demand_total": stmt.excluded.demand_total, "demand": stmt.excluded.demand, }, ) try: session.execute(stmt) session.commit() cr.inserted_records = cr.processed_records cr.server_latest = max( [i["trading_interval"] for i in records_to_store]) except Exception as e: logger.error("Error inserting records") logger.error(e) cr.errors = cr.processed_records finally: session.rollback() session.close() engine.dispose() return cr
def process_trading_regionsum(table: AEMOTableSchema) -> ControllerReturn: engine = get_database_engine() if not table.records: logger.debug(table) raise Exception("Invalid table no records") cr = ControllerReturn(total_records=len(table.records)) limit = None records_to_store = [] records_processed = 0 primary_keys = [] for record in table.records: if not isinstance(record, dict): raise Exception("Invalid record type") trading_interval = parse_date( record["settlementdate"], network=NetworkNEM, dayfirst=False, date_format="%Y/%m/%d %H:%M:%S", ) if not trading_interval: continue _pk = set([trading_interval, record["regionid"]]) if _pk in primary_keys: continue primary_keys.append(_pk) net_interchange = None if "netinterchange" in record: net_interchange = clean_float(record["netinterchange"]) records_to_store.append({ "network_id": "NEM", "created_by": "opennem.controller.nem", "network_region": record["regionid"], "net_interchange_trading": net_interchange, "trading_interval": trading_interval, }) records_processed += 1 if limit and records_processed >= limit: logger.info("Reached limit of: {} {}".format( limit, records_processed)) break stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["trading_interval", "network_id", "network_region"], set_={ "net_interchange_trading": stmt.excluded.net_interchange_trading, }, ) session = get_scoped_session() try: session.execute(stmt) session.commit() cr.inserted_records = cr.processed_records cr.server_latest = max( [i["trading_interval"] for i in records_to_store]) except Exception as e: logger.error("Error inserting records") logger.error(e) records_to_store = [] cr.errors = cr.processed_records finally: session.rollback() session.close() engine.dispose() return cr
def store_wem_balancingsummary_set( balancing_set: WEMBalancingSummarySet) -> ControllerReturn: """Persist wem balancing set to the database""" engine = get_database_engine() session = get_scoped_session() cr = ControllerReturn() records_to_store = [] if not balancing_set.intervals: return cr cr.total_records = len(balancing_set.intervals) cr.server_latest = balancing_set.server_latest for _rec in balancing_set.intervals: records_to_store.append({ "created_by": "wem.controller", "trading_interval": _rec.trading_day_interval, "network_id": "WEM", "network_region": "WEM", "is_forecast": _rec.is_forecast, "forecast_load": _rec.forecast_mw, "generation_total": _rec.actual_total_generation, "generation_scheduled": _rec.actual_nsg_mw, "price": _rec.price, }) cr.processed_records += 1 if len(records_to_store) < 1: return cr stmt = insert(BalancingSummary).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=[ "trading_interval", "network_id", "network_region", ], set_={ "price": stmt.excluded.price, "forecast_load": stmt.excluded.forecast_load, "generation_total": stmt.excluded.generation_total, "is_forecast": stmt.excluded.is_forecast, }, ) try: session.execute(stmt) session.commit() cr.inserted_records = len(records_to_store) except Exception as e: logger.error("Error: {}".format(e)) cr.errors = len(records_to_store) cr.error_detail.append(str(e)) finally: session.close() engine.dispose() return cr
def generate_export_map() -> StatMetadata: """ Generates a map of all export JSONs """ session = get_scoped_session() networks = session.query(Network).filter( Network.export_set.is_(True)).all() if not networks: raise Exception("No networks") countries = list(set([network.country for network in networks])) _exmap = [] for country in countries: # @TODO derive this scada_range = get_scada_range(network=NetworkAU, networks=[NetworkNEM, NetworkWEM]) if not scada_range: raise Exception("Require a scada range for NetworkAU") export = StatExport( stat_type=StatType.power, priority=PriorityType.live, country=country, date_range=scada_range, network=NetworkAU, networks=[ NetworkNEM, NetworkWEM, NetworkAEMORooftop, NetworkAEMORooftopBackfill, NetworkAPVI, ], interval=NetworkAU.get_interval(), period=human_to_period("7d"), ) _exmap.append(export) for year in range( datetime.now().year, scada_range.start.year - 1, -1, ): export = StatExport( stat_type=StatType.energy, priority=PriorityType.daily, country=country, date_range=scada_range, network=NetworkAU, networks=[ NetworkNEM, NetworkWEM, NetworkAEMORooftop, NetworkAEMORooftopBackfill, NetworkAPVI, ], year=year, interval=human_to_interval("1d"), period=human_to_period("1Y"), ) _exmap.append(export) export = StatExport( stat_type=StatType.energy, priority=PriorityType.monthly, country=country, date_range=scada_range, network=NetworkAU, networks=[ NetworkNEM, NetworkWEM, NetworkAEMORooftop, NetworkAEMORooftopBackfill, NetworkAPVI, ], interval=human_to_interval("1M"), period=human_to_period("all"), ) _exmap.append(export) for network in networks: network_schema = network_from_network_code(network.code) scada_range = get_scada_range(network=network_schema) bom_station = get_network_region_weather_station(network.code) export = StatExport( stat_type=StatType.power, priority=PriorityType.live, country=network.country, date_range=scada_range, network=network_schema, bom_station=bom_station, interval=network_schema.get_interval(), period=human_to_period("7d"), ) if network.code == "WEM": export.networks = [NetworkWEM, NetworkAPVI] export.network_region_query = "WEM" if network.code == "NEM": export.networks = [ NetworkNEM, NetworkAEMORooftop, NetworkAEMORooftopBackfill ] _exmap.append(export) if not scada_range: raise Exception("Require a scada range for network: {}".format( network.code)) for year in range( datetime.now().year, scada_range.start.year - 1, -1, ): export = StatExport( stat_type=StatType.energy, priority=PriorityType.daily, country=network.country, date_range=scada_range, network=network_schema, bom_station=bom_station, year=year, period=human_to_period("1Y"), interval=human_to_interval("1d"), ) if network.code == "WEM": export.networks = [NetworkWEM, NetworkAPVI] export.network_region_query = "WEM" if network.code == "NEM": export.networks = [ NetworkNEM, NetworkAEMORooftop, NetworkAEMORooftopBackfill ] _exmap.append(export) export = StatExport( stat_type=StatType.energy, priority=PriorityType.monthly, country=network.country, date_range=scada_range, network=network_schema, bom_station=bom_station, interval=human_to_interval("1M"), period=human_to_period("all"), ) if network.code == "WEM": export.networks = [NetworkWEM, NetworkAPVI] export.network_region_query = "WEM" if network.code == "NEM": export.networks = [ NetworkNEM, NetworkAEMORooftop, NetworkAEMORooftopBackfill ] _exmap.append(export) # Skip cases like wem/wem where region is supurfelous if len(network.regions) < 2: continue for region in network.regions: scada_range = get_scada_range(network=network_schema, network_region=region.code) bom_station = get_network_region_weather_station(region.code) if not scada_range: logger.error( "Require a scada range for network {} and region {}". format(network_schema.code, region.code)) continue export = StatExport( stat_type=StatType.power, priority=PriorityType.live, country=network.country, date_range=scada_range, network=network_schema, network_region=region.code, bom_station=bom_station, period=human_to_period("7d"), interval=network_schema.get_interval(), ) if network.code == "WEM": export.networks = [NetworkWEM, NetworkAPVI] export.network_region_query = "WEM" if network.code == "NEM": export.networks = [ NetworkNEM, NetworkAEMORooftop, NetworkAEMORooftopBackfill ] _exmap.append(export) for year in range( datetime.now().year, scada_range.start.year - 1, -1, ): export = StatExport( stat_type=StatType.energy, priority=PriorityType.daily, country=network.country, date_range=scada_range, network=network_schema, network_region=region.code, networks=[ NetworkNEM, NetworkAEMORooftop, NetworkAEMORooftopBackfill ], bom_station=bom_station, year=year, period=human_to_period("1Y"), interval=human_to_interval("1d"), ) _exmap.append(export) export = StatExport( stat_type=StatType.energy, priority=PriorityType.monthly, country=network.country, date_range=scada_range, network=network_schema, networks=[ NetworkNEM, NetworkAEMORooftop, NetworkAEMORooftopBackfill ], network_region=region.code, bom_station=bom_station, period=human_to_period("all"), interval=human_to_interval("1M"), ) if network.code == "WEM": export.networks = [NetworkWEM, NetworkAPVI] export.network_region_query = "WEM" if network.code == "NEM": export.networks = [ NetworkNEM, NetworkAEMORooftop, NetworkAEMORooftopBackfill ] _exmap.append(export) export_meta = StatMetadata(date_created=datetime.now(), version=get_version(), resources=_exmap) return export_meta
def store_bom_observation_intervals( observations: BOMObservationReturn) -> ControllerReturn: """Store BOM Observations""" engine = get_database_engine() cr = ControllerReturn(total_records=len(observations.observations)) latest_forecast: Optional[datetime] = max([ o.observation_time for o in observations.observations if o.observation_time ]) if latest_forecast: latest_forecast = latest_forecast.astimezone( ZoneInfo("Australia/Sydney")) logger.debug("server_latest is {}".format(latest_forecast)) cr.server_latest = latest_forecast records_to_store = [] for obs in observations.observations: records_to_store.append({ "station_id": observations.station_code, "observation_time": obs.observation_time, "temp_apparent": obs.apparent_t, "temp_air": obs.air_temp, "press_qnh": obs.press_qnh, "wind_dir": obs.wind_dir, "wind_spd": obs.wind_spd_kmh, "wind_gust": obs.gust_kmh, "cloud": obs.cloud, "cloud_type": obs.cloud_type, "humidity": obs.rel_hum, }) cr.processed_records += 1 if not len(records_to_store): return cr stmt = insert(BomObservation).values(records_to_store) stmt.bind = engine stmt = stmt.on_conflict_do_update( index_elements=["observation_time", "station_id"], set_={ "temp_apparent": stmt.excluded.temp_apparent, "temp_air": stmt.excluded.temp_air, "press_qnh": stmt.excluded.press_qnh, "wind_dir": stmt.excluded.wind_dir, "wind_spd": stmt.excluded.wind_spd, "wind_gust": stmt.excluded.wind_gust, "cloud": stmt.excluded.cloud, "cloud_type": stmt.excluded.cloud_type, "humidity": stmt.excluded.humidity, }, ) with get_scoped_session() as session: try: session.execute(stmt) session.commit() except Exception as e: logger.error("Error: {}".format(e)) cr.errors = cr.processed_records cr.error_detail.append(str(e)) finally: session.close() engine.dispose() cr.inserted_records = cr.processed_records return cr
def export_all_monthly() -> None: session = get_scoped_session() all_monthly = OpennemDataSet(code="au", data=[], version=get_version(), created_at=datetime.now()) cpi = gov_stats_cpi() all_monthly.append_set(cpi) # Iterate networks and network regions networks = [NetworkNEM, NetworkWEM] for network in networks: network_regions = session.query(NetworkRegion).filter( NetworkRegion.network_id == network.code).all() for network_region in network_regions: networks = [] logging.info( "Exporting monthly for network {} and region {}".format( network.code, network_region.code)) if network_region.code == "WEM": networks = [NetworkWEM, NetworkAPVI] if network == NetworkNEM: networks = [NetworkNEM, NetworkAEMORooftop] logger.debug("Running monthlies for {} and {}".format( network.code, network_region.code)) scada_range: ScadaDateRange = get_scada_range(network=network, networks=networks, energy=True) if not scada_range or not scada_range.start: logger.error( "Could not get scada range for network {} and energy {}". format(network, True)) continue time_series = TimeSeries( start=scada_range.start, end=scada_range.end, network=network, interval=human_to_interval("1M"), period=human_to_period("all"), ) stat_set = energy_fueltech_daily( time_series=time_series, networks_query=networks, network_region_code=network_region.code, ) if not stat_set: continue demand_energy_and_value = demand_network_region_daily( time_series=time_series, network_region_code=network_region.code, networks=networks) stat_set.append_set(demand_energy_and_value) if network == NetworkNEM: interconnector_flows = energy_interconnector_flows_and_emissions( time_series=time_series, networks_query=networks, network_region_code=network_region.code, ) stat_set.append_set(interconnector_flows) all_monthly.append_set(stat_set) bom_station = get_network_region_weather_station( network_region.code) if bom_station: try: weather_stats = weather_daily( time_series=time_series, station_code=bom_station, network_region=network_region.code, ) all_monthly.append_set(weather_stats) except Exception: pass write_output("v3/stats/au/all/monthly.json", all_monthly)
def set_crawler_history(crawler_name: str, histories: list[CrawlHistoryEntry]) -> int: """Sets the crawler history""" engine = get_database_engine() session = get_scoped_session() history_intervals = [i.interval for i in histories] logger.debug(f"Have {len(history_intervals)} history intervals for {crawler_name}") date_max = max(history_intervals) date_min = min(history_intervals) logger.debug(f"crawler {crawler_name} date range: {date_min}, {date_max}") stmt = sql( """ select interval from crawl_history where interval >= :date_max and interval <= :date_min """ ) query = stmt.bindparams(date_max=date_max, date_min=date_min) with engine.connect() as c: results = list(c.execute(query)) existing_intervals = [i[0] for i in results] logger.debug(f"Got {len(existing_intervals)} existing intervals for crawler {crawler_name}") # Persist the crawl history records crawl_history_records: list[Dict[str, datetime | str | int | None]] = [] for ch in histories: crawl_history_records.append( { "source": "nemweb", "crawler_name": crawler_name, "network_id": "NEM", "interval": ch.interval, "inserted_records": ch.records, "crawled_time": None, "processed_time": get_today_opennem(), } ) # insert stmt = insert(CrawlHistory).values(crawl_history_records) stmt.bind = engine # type: ignore stmt = stmt.on_conflict_do_update( # type: ignore index_elements=["source", "crawler_name", "network_id", "interval"], set_={ "inserted_records": stmt.excluded.inserted_records, # type: ignore "crawled_time": stmt.excluded.crawled_time, # type: ignore "processed_time": stmt.excluded.processed_time, # type: ignore }, ) try: session.execute(stmt) session.commit() except Exception as e: logger.error(f"set_crawler_history error updating records: {e}") finally: session.rollback() session.close() engine.dispose() return len(histories)
def generate_weekly_export_map() -> StatMetadata: """ Generate export map for weekly power series @TODO deconstruct this into separate methods and schema ex. network.get_scada_range(), network_region.get_bom_station() etc. """ session = get_scoped_session() networks = session.query(Network).filter( Network.export_set.is_(True)).all() if not networks: raise Exception("No networks") countries = list(set([network.country for network in networks])) _exmap = [] # Loop countries for country in countries: # @TODO derive this scada_range = get_scada_range(network=NetworkAU, networks=[NetworkNEM, NetworkWEM]) if not scada_range: raise Exception("Require a scada range for NetworkAU") for year, week in week_series(scada_range.end, scada_range.start): export = StatExport( stat_type=StatType.power, priority=PriorityType.history, country=country, network=NetworkAU, networks=[NetworkNEM, NetworkWEM], year=year, week=week, date_range=date_range_from_week(year, week, NetworkAU), interval=human_to_interval("30m"), period=human_to_period("7d"), ) _exmap.append(export) # Loop networks for network in networks: network_schema = network_from_network_code(network.code) scada_range = get_scada_range(network=network_schema) if not scada_range: raise Exception("Require a scada range for network: {}".format( network.code)) for year, week in week_series(scada_range.end, scada_range.start): export = StatExport( stat_type=StatType.power, priority=PriorityType.history, country=network.country, network=network_schema, year=year, week=week, date_range=date_range_from_week(year, week, NetworkAU), interval=human_to_interval(f"{network.interval_size}m"), period=human_to_period("7d"), ) if network.code == "WEM": export.networks = [NetworkWEM, NetworkAPVI] export.network_region_query = "WEM" _exmap.append(export) # Skip cases like wem/wem where region is supurfelous if len(network.regions) < 2: continue for region in network.regions: scada_range = get_scada_range(network=network_schema, network_region=region.code) if not scada_range: logger.error( "Require a scada range for network {} and region {}". format(network_schema.code, region.code)) continue for year, week in week_series(scada_range.end, scada_range.start): export = StatExport( stat_type=StatType.power, priority=PriorityType.history, country=network.country, network=network_schema, year=year, week=week, date_range=date_range_from_week( year, week, network_from_network_code(network.code)), interval=human_to_interval(f"{network.interval_size}m"), period=human_to_period("7d"), ) if network.code == "WEM": export.networks = [NetworkWEM, NetworkAPVI] export.network_region_query = "WEM" _exmap.append(export) export_meta = StatMetadata(date_created=datetime.now(), version=get_version(), resources=_exmap) return export_meta