Esempio n. 1
0
def get_facility_first_seen(
        period: Optional[str] = None) -> List[FacilitySeen]:
    """Run this and it'll check if there are new facilities in
    scada data and let you know which ones

    The query can be an expensive one so don't run to often.
    """

    engine = get_database_engine()

    __query = """
        select
            distinct fs.facility_code,
            fs.network_id
        from facility_scada fs
        where
            fs.facility_code not in (select distinct code from facility)
            {period_query}
    """

    period_query = f"and fs.trading_interval > now() - interval '{period}'" if period else ""

    query = __query.format(period_query=period_query)

    with engine.connect() as c:
        logger.debug(query)
        row = list(c.execute(query))

    records: List[FacilitySeen] = [
        FacilitySeen(code=r[0], network_id=r[1]) for r in row
    ]

    return records
Esempio n. 2
0
def get_v2_compat_data() -> OpennemDataSet:
    query = """
    select
        trading_interval at time zone 'AEST' as trading_interval,
        facility_code,
        generated
    from facility_scada
    where
        facility_code in ('LD01', 'LD02', 'LD03', 'LD04', 'BW01', 'BW02', 'BW03', 'BW04', 'MP1', 'ER01', 'ER02', 'ER03', 'ER04', 'MM3', 'MM4', 'MP2', 'REDBANK1', 'VP5', 'VP6', 'WW7', 'WW8')
        and trading_interval >= '2021-01-01 00:00:00+10'
    order by trading_interval asc;
    """

    engine = get_database_engine()

    with engine.connect() as c:
        logger.debug(query)
        res = list(c.execute(query))

    results = [
        ScadaResults(trading_interval=i[0], code=i[1], generated=i[2]).dict()
        for i in res
    ]

    return results
Esempio n. 3
0
def get_network_data_ranges() -> list[NetworkDataDateRanges]:
    """Runs a query to get the network data ranges"""
    engine = get_database_engine()

    stmt = sql("""
        select
            fs.network_id,
            min(fs.trading_interval) as scada_min,
            max(fs.trading_interval) as scada_max
        from facility_scada fs
        where
            fs.is_forecast is FALSE
        group by fs.network_id;
    """)

    with engine.connect() as c:
        results = list(c.execute(stmt))

    if not results:
        raise Exception(
            "No results for data range query in update_network_data_ranges")

    models = [
        NetworkDataDateRanges(network=i[0], data_min=i[1], data_max=i[2])
        for i in results
    ]

    return models
Esempio n. 4
0
def get_facility_first_seen() -> List[FacilitySeen]:
    """Run this and it'll check if there are new facilities in
    scada data and let you know which ones

    The query can be an expensive one so don't run to often.
    """

    engine = get_database_engine()

    __query = """
        select
            distinct fs.facility_code,
            fs.network_id
        from facility_scada fs
        where
            fs.facility_code not in (select distinct code from facility);
    """

    with engine.connect() as c:
        logger.debug(__query)
        row = list(c.execute(__query))

    records: List[FacilitySeen] = [
        FacilitySeen(code=r[0], network_id=r[1]) for r in row
    ]

    return records
Esempio n. 5
0
def get_generated(
    network_region: str,
    date_min: datetime,
    date_max: datetime,
    network: NetworkSchema,
    fueltech_id: Optional[str] = None,
) -> List[Dict]:
    """Gets generated values for a date range for a network and network region
    and optionally for a single fueltech"""

    # @TODO support refresh energies for a single duid or station

    query = get_generated_query(network_region, date_min, date_max, network,
                                fueltech_id)

    engine = get_database_engine()

    results = []

    with engine.connect() as c:
        logger.debug(query)

        if not DRY_RUN:
            try:
                results = list(c.execute(query))
            except Exception as e:
                logger.error(e)

    logger.debug("Got back {} rows".format(len(results)))

    return results
Esempio n. 6
0
def refresh_material_views(view_name: Optional[str] = None,
                           concurrently: bool = True,
                           with_data: bool = True) -> None:
    """Refresh material views"""
    __query = "REFRESH MATERIALIZED VIEW {is_concurrent} {view} {data_spec}"

    engine = get_database_engine()

    views = []

    if view_name:
        views.append(view_name)
    else:
        views = get_materialized_view_names()

    with engine.connect() as c:
        for v in views:
            query = __query.format(
                view=v,
                is_concurrent="concurrently" if concurrently else "",
                data_spec="with data" if with_data else "",
            )
            logger.debug(query)

            try:
                c.execution_options(
                    isolation_level="AUTOCOMMIT").execute(query)
            except Exception as e:
                logger.error("Could not run material refresh: {}".format(e))
Esempio n. 7
0
def _clear_scada_for_range(item: Dict[str, Any]) -> None:
    # We need to purge old records in that date range
    all_dates = [i["trading_interval"] for i in item["records"]]
    min_date = min(all_dates)
    max_date = max(all_dates)
    duids = list(set([i["facility_code"] for i in item["records"]]))

    __sql = """
        update facility_scada set
            generated=null
        where
            network_id='NEM' and
            facility_code in ({fac_codes}) and
            trading_interval >= '{min_date}' and
            trading_interval <= '{max_date}'
    """

    query = __sql.format(fac_codes=duid_in_case(duids),
                         min_date=min_date,
                         max_date=max_date)

    engine = get_database_engine()

    with engine.connect() as c:
        logger.debug(query)
        c.execute(query)
Esempio n. 8
0
def exec_aggregates_network_demand_query(date_min: datetime,
                                         date_max: datetime,
                                         network: NetworkSchema) -> bool:
    resp_code: bool = False
    engine = get_database_engine()
    result = None

    if date_max < date_min:
        raise Exception(
            "exec_aggregates_network_demand_query: date_max ({}) is prior to date_min ({})"
            .format(date_max, date_min))

    query = aggregates_network_demand_query(date_min=date_min,
                                            date_max=date_max,
                                            network=network)

    with engine.connect() as c:
        logger.debug(query)

        if not DRY_RUN:
            result = c.execute(query)

    logger.debug(result)

    return resp_code
Esempio n. 9
0
def init_aggregation_policies() -> None:
    """Initializes the continuous aggregation policies"""

    # @TODO check what exists with query

    engine = get_database_engine()

    for view in _VIEW_MAP:
        if not view.aggregation_policy:
            logging.debug("Skipping {}".format(view.name))
            continue

        with engine.connect() as c:

            drop_query = remove_continuous_aggregation_query(view)

            try:
                logger.debug(drop_query)
                c.execute(drop_query)
            except Exception:
                logger.warn(
                    "Could not drop continuous aggregation query: {}".format(
                        view.name))
                pass

            create_query = create_continuous_aggregation_query(view)

            logger.debug(create_query)

            try:
                c.execute(create_query)
            except Exception as e:
                logger.warn(
                    "Could not create continuous aggregation query: {}".format(
                        e))
Esempio n. 10
0
def crawlers_get_crawl_metadata() -> List[CrawlMetadata]:
    """Get a return of metadata schemas for all crawlers from the database"""
    engine = get_database_engine()

    __query = """
        select
            cm.spider_name as name,
            cm.data->>'version' as version,
            cm.data->>'last_crawled' as last_crawled,
            cm.data->>'latest_processed' as last_processed,
            cm.data->>'server_latest' as server_latest,
            cm.data->>'force_run' as force_run
        from crawl_meta cm
        order by last_crawled desc;
    """
    _crawler_metas = []

    with engine.connect() as c:
        _crawler_metas = list(c.execute(__query))

    if not _crawler_metas:
        return []

    _crawler_meta_models = [CrawlMetadata(**i) for i in _crawler_metas]

    return _crawler_meta_models
Esempio n. 11
0
def get_flows(
    date_min: datetime,
    date_max: datetime,
    network_region: str,
    network: NetworkSchema,
    flow: FlowDirection,
) -> List[Dict]:
    """Gets flows"""

    query = get_flows_query(network_region, date_min, date_max, network, flow)

    engine = get_database_engine()

    results = []

    with engine.connect() as c:
        logger.debug(query)

        if not DRY_RUN:
            try:
                results = list(c.execute(query))
            except Exception as e:
                logger.error(e)

    logger.debug("Got back {} flow rows".format(len(results)))

    return results
Esempio n. 12
0
def store_stats_database(statset: StatsSet) -> int:
    s = SessionLocal()

    records_to_store = [i.dict() for i in statset.stats]

    stmt = insert(Stats).values(records_to_store)
    stmt.bind = get_database_engine()
    stmt = stmt.on_conflict_do_update(
        index_elements=[
            "stat_date",
            "country",
            "stat_type",
        ],
        set_={
            "value": stmt.excluded.value,
        },
    )

    try:
        s.execute(stmt)
        s.commit()
    except Exception as e:
        logger.error("Error inserting records")
        logger.error(e)
        return 0
    finally:
        s.close()

    num_records = len(records_to_store)

    logger.info("Wrote {} records to database".format(num_records))

    return num_records
Esempio n. 13
0
def bulkinsert_mms_items(
    table: ORMTableType,
    records: List[Dict],
    update_fields: Optional[List[Union[str, Column[Any]]]] = None,
) -> int:
    num_records = 0

    if not records:
        return 0

    sql_query = build_insert_query(table, update_fields)
    csv_content = generate_bulkinsert_csv_from_records(table,
                                                       records,
                                                       column_names=list(
                                                           records[0].keys()))

    # @TODO check the scoping here
    engine = get_database_engine()
    conn = engine.raw_connection()

    try:
        cursor = conn.cursor()
        cursor.copy_expert(sql_query, csv_content)
        conn.commit()
        num_records = len(records)
        logger.info(f"Bulk inserted {len(records)} records")
    except Exception as generic_error:
        if hasattr(generic_error, "hide_parameters"):
            generic_error.hide_parameters = True  # type: ignore
        logger.error(generic_error)
    finally:
        engine.dispose()
        conn.close()

    return num_records
Esempio n. 14
0
def gov_stats_cpi() -> Optional[OpennemDataSet]:
    engine = get_database_engine()

    query = country_stats_query(StatTypes.CPI)

    with engine.connect() as c:
        logger.debug(query)
        row = list(c.execute(query))

    stats = [
        DataQueryResult(interval=i[0], result=i[1], group_by=i[2] if len(i) > 1 else None)
        for i in row
    ]

    if len(stats) < 1:
        logger.error("No results for gov_stats_cpi returing blank set")
        return None

    result = stats_factory(
        stats,
        code="au.cpi",
        network=NetworkNEM,
        interval=human_to_interval("1Q"),
        period=human_to_period("all"),
        units=get_unit("cpi"),
        group_field="gov",
    )

    return result
Esempio n. 15
0
def exec_aggregates_facility_daily_query(date_min: datetime,
                                         date_max: datetime,
                                         network: NetworkSchema) -> bool:
    resp_code: bool = False
    engine = get_database_engine()
    result = None

    # @TODO should put this check everywhere
    # or place it all in a schema that validates
    if date_max < date_min:
        raise Exception(
            "exec_aggregates_facility_daily_query: date_max ({}) is prior to date_min ({})"
            .format(date_max, date_min))

    query = aggregates_facility_daily_query(date_min=date_min,
                                            date_max=date_max,
                                            network=network)

    with engine.connect() as c:
        logger.debug(query)

        if not DRY_RUN:
            result = c.execute(query)

    logger.debug(result)

    # @NOTE rooftop fix for double counts
    if not DRY_RUN:
        run_rooftop_fix()

    return resp_code
Esempio n. 16
0
def store_bom_records_temp_max(recs: List[Dict]) -> None:
    # records_to_store = [i for i in recs if i["temp_max"] is not None]
    records_to_store = recs

    first = recs[0]
    update_field = "temp_max"

    if "temp_min" in first:
        update_field = "temp_min"

    engine = get_database_engine()
    session = SessionLocal()

    # insert
    stmt = insert(BomObservation).values(records_to_store)
    stmt.bind = engine
    stmt = stmt.on_conflict_do_update(
        index_elements=["observation_time", "station_id"],
        set_={update_field: getattr(stmt.excluded, update_field)},
    )

    try:
        session.execute(stmt)
        session.commit()
    except Exception as e:
        logger.error("Error inserting records")
        logger.error(e)
        return {"num_records": 0}
    finally:
        session.close()

    return {"num_records": len(records_to_store)}
Esempio n. 17
0
def process_unit_solution(table):
    session = SessionLocal()
    engine = get_database_engine()

    if "records" not in table:
        raise Exception("Invalid table no records")

    records = table["records"]

    records_to_store = []
    records_primary_keys = []

    for record in records:
        trading_interval = parse_date(
            record["SETTLEMENTDATE"], network=NetworkNEM, dayfirst=False
        )
        facility_code = normalize_duid(record["DUID"])

        if not trading_interval or not facility_code:
            continue

        # Since this can insert 1M+ records at a time we need to
        # do a separate in-memory check of primary key constraints
        # better way of doing this .. @TODO
        _unique_set = (trading_interval, facility_code, "NEM")

        if _unique_set not in records_primary_keys:

            records_to_store.append(
                {
                    "trading_interval": trading_interval,
                    "facility_code": facility_code,
                    "eoi_quantity": float(record["INITIALMW"]),
                    "network_id": "NEM",
                }
            )
            records_primary_keys.append(_unique_set)

    # free
    records_primary_keys = []

    logger.debug("Saving %d records", len(records_to_store))

    stmt = insert(FacilityScada).values(records_to_store)
    stmt.bind = engine
    stmt = stmt.on_conflict_do_update(
        constraint="facility_scada_pkey",
        set_={"eoi_quantity": stmt.excluded.eoi_quantity},
    )

    try:
        session.execute(stmt)
        session.commit()
    except Exception as e:
        logger.error("Error: {}".format(e))
        return 0
    finally:
        session.close()

    return len(records_to_store)
def update_facility_seen_range(
    include_first_seen: bool = False,
    facility_codes: Optional[List[str]] = None,
) -> bool:
    """Updates last seen and first seen. For each facility updates the date the facility
    was seen for the first and last time in the power data from FacilityScada.

    Args:
        include_first_seen (bool, optional): Include earliest seen time. Defaults to False.
        facility_codes (Optional[List[str]], optional): List of facility codes to update. Defaults to None.

    Returns:
        bool: Ran successfuly
    """

    engine = get_database_engine()

    __query = get_update_seen_query(include_first_seen=include_first_seen,
                                    facility_codes=facility_codes)

    with engine.connect() as c:
        logger.debug(__query)
        c.execute(__query)

    slack_message("Ran facility_seen_range")

    return True
Esempio n. 19
0
def test_insert():
    data = get_file_content()

    csv_reader = csv.DictReader(data.split("\n"))
    csv_buffer = StringIO()
    fieldnames = [
        "created_by",
        "created_at",
        "updated_at",
        "network_id",
        "trading_interval",
        "facility_code",
        "generated",
        "eoi_quantity",
    ]

    csvwriter = csv.DictWriter(
        csv_buffer,
        fieldnames=fieldnames,
    )

    for record in facility_scada_generate_records(csv_reader):
        csvwriter.writerow(record)

    conn = get_database_engine().raw_connection()

    cursor = conn.cursor()
    csv_buffer.seek(0)
Esempio n. 20
0
def purge_views() -> None:
    """Remove views that aren't in the view table"""

    engine = get_database_engine()

    all_views_query = get_all_views_query()
    all_views = []

    with engine.connect() as c:
        result = list(c.execute(all_views_query))

    # Dont drop postgis or mapped views
    all_views = [
        i[0] for i in result
        if i[0] not in POSTGIS_VIEWS + [i.name for i in _VIEW_MAP]
    ]

    for view_name in all_views:

        with engine.connect() as c:
            c.execution_options(isolation_level="AUTOCOMMIT")

            query = "drop materialized view if exists {} cascade;".format(
                view_name)

            logger.info("Dropping view {}".format(view_name))
            logger.debug(query)

            try:
                c.execute(query)
            except Exception as e:
                logger.error("Error dropping view: {}".format(e))
Esempio n. 21
0
def get_daily_fueltech_summary() -> DailySummary:
    engine = get_database_engine()
    _result = []

    query = daily_fueltech_summary_query()

    with engine.connect() as c:
        logger.debug(query)
        _result = list(c.execute(query))

    records = [
        DailySummaryResult(
            trading_day=i[0],
            network=i[1],
            fueltech_id=i[2],
            fueltech_label=i[3],
            renewable=i[4],
            energy=i[5],
            generated_total=i[6],
            demand_total=i[7],
            demand_proportion=i[8],
        ) for i in _result
    ]

    ds = DailySummary(trading_day=records[0].trading_day,
                      network=records[0].network,
                      results=records)

    return ds
Esempio n. 22
0
def process_nem_price(table: AEMOTableSchema) -> ControllerReturn:
    """Stores the NEM price for both dispatch price and trading price"""
    session = get_scoped_session()
    engine = get_database_engine()

    cr = ControllerReturn(total_records=len(table.records))
    records_to_store = []
    primary_keys = []

    price_field = "price"

    if table.full_name == "dispatch_price":
        price_field = "price_dispatch"

    for record in table.records:
        # @NOTE disable pk track
        trading_interval = parse_date(record["settlementdate"])

        primary_key = set([trading_interval,
                           record["regionid"]])  # type: ignore

        if primary_key in primary_keys:
            continue

        primary_keys.append(primary_key)

        records_to_store.append({
            "network_id": "NEM",
            "created_by": "opennem.controllers.nem",
            "network_region": record["regionid"],
            "trading_interval": trading_interval,
            price_field: record["rrp"],
        })

        cr.processed_records += 1

    stmt = insert(BalancingSummary).values(records_to_store)
    stmt.bind = engine
    stmt = stmt.on_conflict_do_update(
        index_elements=["trading_interval", "network_id", "network_region"],
        set_={price_field: getattr(stmt.excluded, price_field)},
    )

    try:
        session.execute(stmt)
        session.commit()
        cr.inserted_records = cr.processed_records
        cr.server_latest = max(
            [i["trading_interval"] for i in records_to_store])
    except Exception as e:
        logger.error("Error inserting NEM price records")
        logger.error(e)
        cr.errors = cr.processed_records
    finally:
        session.rollback()
        session.close()
        engine.dispose()

    return cr
Esempio n. 23
0
def power_flows_week(
    time_series: TimeSeries,
    network_region_code: str,
) -> Optional[OpennemDataSet]:
    engine = get_database_engine()

    query = interconnector_power_flow(time_series=time_series,
                                      network_region=network_region_code)

    with engine.connect() as c:
        logger.debug(query)
        row = list(c.execute(query))

    if len(row) < 1:
        raise Exception("No results from query: {}".format(query))

    imports = [
        DataQueryResult(interval=i[0],
                        result=i[2],
                        group_by="imports" if len(i) > 1 else None)
        for i in row
    ]

    exports = [
        DataQueryResult(interval=i[0],
                        result=i[3],
                        group_by="exports" if len(i) > 1 else None)
        for i in row
    ]

    result = stats_factory(
        imports,
        # code=network_region_code or network.code,
        network=time_series.network,
        period=human_to_period("7d"),
        interval=human_to_interval("5m"),
        units=get_unit("power"),
        region=network_region_code,
        fueltech_group=True,
    )

    if not result:
        raise Exception("No results")

    result_exports = stats_factory(
        exports,
        # code=network_region_code or network.code,
        network=time_series.network,
        period=human_to_period("7d"),
        interval=human_to_interval("5m"),
        units=get_unit("power"),
        region=network_region_code,
        fueltech_group=True,
    )

    result.append_set(result_exports)

    return result
Esempio n. 24
0
def init_fueltechs() -> None:
    engine = get_database_engine()

    with engine.connect() as c:
        for facility_code, fueltech_id in FUELTECHS.items():
            c.execute(get_update_sql(facility_code, fueltech_id))
            logger.debug("Updated fueltech for {}".format(facility_code))

    logger.info("Done updating facility fueltechs")
Esempio n. 25
0
def store_mms_table(table: AEMOTableSchema) -> int:

    if not table.name:
        logger.error("Table has no name!: {}".format(table))
        return 0

    # Get the table ORM model
    table_schema = get_mms_model(table)

    if not table_schema:
        logger.error("No table ORM schema for table name {}".format(
            table.name))
        return 0

    # update all non-primary key fields. get them dynamically.
    update_fields = [
        i.name for i in table_schema.__table__.columns if not i.primary_key
    ]  # type: ignore

    records_to_store = table.records

    sql_query = ""

    try:
        sql_query = build_insert_query(table_schema, update_fields)
    except Exception as e:
        logger.error(e)
        return 0

    conn = get_database_engine().raw_connection()
    cursor = conn.cursor()

    csv_content = ""

    try:
        csv_content = generate_csv_from_records(
            table_schema,
            records_to_store,
            column_names=list(records_to_store[0].keys()),
        )
    except Exception as e:
        logger.error(e)
        return 0

    if not csv_content:
        return 0

    logger.debug(csv_content.getvalue().splitlines()[:2])

    cursor.copy_expert(sql_query, csv_content)
    conn.commit()

    logger.info("{}: Inserted {} records".format(table.full_name,
                                                 len(records_to_store)))

    return len(records_to_store)
Esempio n. 26
0
def run_rooftop_fix() -> None:
    query = "delete from at_facility_daily where trading_day < '2018-03-01 00:00:00+00' and network_id='AEMO_ROOFTOP';"

    engine = get_database_engine()

    with engine.connect() as c:
        logger.debug(query)

        if not DRY_RUN:
            c.execute(query)
Esempio n. 27
0
def store_wem_facility_intervals(
        balancing_set: WEMFacilityIntervalSet) -> ControllerReturn:
    """Persist WEM facility intervals"""
    engine = get_database_engine()
    session = get_scoped_session()
    cr = ControllerReturn()

    records_to_store = []

    if not balancing_set.intervals:
        return cr

    cr.total_records = len(balancing_set.intervals)
    cr.server_latest = balancing_set.server_latest

    for _rec in balancing_set.intervals:
        records_to_store.append({
            "created_by": "wem.controller",
            "network_id": "WEM",
            "trading_interval": _rec.trading_interval,
            "facility_code": _rec.facility_code,
            "generated": _rec.generated,
            "eoi_quantity": _rec.eoi_quantity,
        })
        cr.processed_records += 1

    if len(records_to_store) < 1:
        return cr

    stmt = insert(FacilityScada).values(records_to_store)
    stmt.bind = engine
    stmt = stmt.on_conflict_do_update(
        index_elements=[
            "trading_interval", "network_id", "facility_code", "is_forecast"
        ],
        set_={
            "generated": stmt.excluded.generated,
            "eoi_quantity": stmt.excluded.eoi_quantity,
        },
    )

    try:
        session.execute(stmt)
        session.commit()
        cr.inserted_records = len(records_to_store)
    except Exception as e:
        logger.error("Error: {}".format(e))
        cr.errors = len(records_to_store)
        cr.error_detail.append(str(e))
    finally:
        session.close()
        engine.dispose()

    return cr
Esempio n. 28
0
def process_dispatch_interconnectorres(
        table: AEMOTableSchema) -> ControllerReturn:
    session = get_scoped_session()
    engine = get_database_engine()

    cr = ControllerReturn(total_records=len(table.records))
    records_to_store = []
    primary_keys = []

    for record in table.records:
        primary_key = set(
            [record["settlementdate"], record["interconnectorid"]])

        if primary_key in primary_keys:
            continue

        primary_keys.append(primary_key)

        records_to_store.append({
            "network_id": "NEM",
            "created_by": "opennem.controller",
            "facility_code": record["interconnectorid"],
            "trading_interval": record["settlementdate"],
            "generated": record["mwflow"],
        })
        cr.processed_records += 1

    # insert
    stmt = insert(FacilityScada).values(records_to_store)
    stmt.bind = engine
    stmt = stmt.on_conflict_do_update(
        index_elements=[
            "trading_interval", "network_id", "facility_code", "is_forecast"
        ],
        set_={"generated": stmt.excluded.generated},
    )

    try:
        session.execute(stmt)
        session.commit()
        cr.inserted_records = cr.processed_records
        cr.server_latest = max(
            [i["trading_interval"] for i in records_to_store])
    except Exception as e:
        logger.error("Error inserting records")
        logger.error(e)
        cr.errors = cr.processed_records
        return cr
    finally:
        session.rollback()
        session.close()
        engine.dispose()

    return cr
Esempio n. 29
0
    def process_item(self, item, spider=None):

        s = SessionLocal()

        csvreader = csv.DictReader(item["content"].split("\n"))

        records_to_store = []
        primary_keys = []

        for row in csvreader:
            trading_interval = parse_date(row["TRADING_DAY_INTERVAL"],
                                          network=NetworkWEM,
                                          dayfirst=False)

            if trading_interval not in primary_keys:
                forecast_load = clean_float(row["FORECAST_EOI_MW"])

                records_to_store.append({
                    "created_by": spider.name,
                    "trading_interval": trading_interval,
                    "network_id": "WEM",
                    "network_region": "WEM",
                    "forecast_load": forecast_load,
                    # generation_scheduled=row["Scheduled Generation (MW)"],
                    # generation_total=row["Total Generation (MW)"],
                    "price": clean_float(row["PRICE"]),
                })
                primary_keys.append(trading_interval)

        stmt = insert(BalancingSummary).values(records_to_store)
        stmt.bind = get_database_engine()
        stmt = stmt.on_conflict_do_update(
            index_elements=[
                "trading_interval",
                "network_id",
                "network_region",
            ],
            set_={
                "price": stmt.excluded.price,
                "forecast_load": stmt.excluded.forecast_load,
            },
        )

        try:
            s.execute(stmt)
            s.commit()
        except Exception as e:
            logger.error("Error inserting records")
            logger.error(e)
            return 0
        finally:
            s.close()

        return len(records_to_store)
Esempio n. 30
0
def crawlers_flush_metadata(days: int | None = None,
                            crawler_name: str | None = None) -> None:
    """Flush the crawler metadata"""
    engine = get_database_engine()

    __meta_query = """
        delete
        from crawl_meta cm
        where
            1=1 and
            {crawler_clause}
        ;
    """

    meta_crawler_clause = ""

    if crawler_name:
        meta_crawler_clause = crawler_name

    meta_query = __meta_query.format(
        crawler_clause=f"spider_name = '{meta_crawler_clause}'", )

    __history_query = """
        delete
        from crawl_history
        where
            1=1 and
            {crawler_clause_history}
            {days_clause_history}

    """

    crawler_clause_history = ""

    if crawler_name:
        crawler_clause_history = f"crawler_name = '{crawler_name}' and"

    days_clause_history = ""

    if days:
        days_clause_history = f"interval >= now() - interval '{days} days'"

    history_query = __history_query.format(
        crawler_clause_history=crawler_clause_history,
        days_clause_history=days_clause_history)

    logger.debug(dedent(meta_query))
    logger.debug(dedent(history_query))

    with engine.connect() as c:
        c.execute(meta_query)
        c.execute(history_query)