Python build_insert_queryの例、opennem.pipelines.bulk_insert.build_insert_query Pythonの例

コード例 #1

0

ファイルを表示

ファイル: mms_import.py プロジェクト: opennem/opennem

def store_mms_table(table: AEMOTableSchema) -> int:

    if not table.name:
        logger.error("Table has no name!: {}".format(table))
        return 0

    # Get the table ORM model
    table_schema = get_mms_model(table)

    if not table_schema:
        logger.error("No table ORM schema for table name {}".format(
            table.name))
        return 0

    # update all non-primary key fields. get them dynamically.
    update_fields = [
        i.name for i in table_schema.__table__.columns if not i.primary_key
    ]  # type: ignore

    records_to_store = table.records

    sql_query = ""

    try:
        sql_query = build_insert_query(table_schema, update_fields)
    except Exception as e:
        logger.error(e)
        return 0

    conn = get_database_engine().raw_connection()
    cursor = conn.cursor()

    csv_content = ""

    try:
        csv_content = generate_csv_from_records(
            table_schema,
            records_to_store,
            column_names=list(records_to_store[0].keys()),
        )
    except Exception as e:
        logger.error(e)
        return 0

    if not csv_content:
        return 0

    logger.debug(csv_content.getvalue().splitlines()[:2])

    cursor.copy_expert(sql_query, csv_content)
    conn.commit()

    logger.info("{}: Inserted {} records".format(table.full_name,
                                                 len(records_to_store)))

    return len(records_to_store)

コード例 #2

0

ファイルを表示

def main():
    if not BOM_BACKUP_PATH.is_file():
        raise Exception("Not a file: {}".format(BOM_BACKUP_PATH))

    fieldnames = [i.name for i in BomObservation.__table__.columns.values()]

    records_to_store = []

    with BOM_BACKUP_PATH.open() as fh:
        csvreader = csv.DictReader(fh, fieldnames=fieldnames, delimiter="\t")

        count_rec = 0

        for row in csvreader:
            parsed_row = parse_record(row)
            records_to_store.append(parsed_row)

            count_rec += 1
            if LIMIT_REC > 0 and count_rec > LIMIT_REC:
                break

    update_fields = [
        "temp_apparent",
        "temp_air",
        "press_qnh",
        "wind_dir",
        "wind_spd",
        "wind_gust",
        "cloud",
        "cloud_type",
        "humidity",
    ]

    sql_query = build_insert_query(BomObservation, update_fields)

    conn = get_database_engine().raw_connection()
    cursor = conn.cursor()
    csv_content = generate_csv_from_records(
        BomObservation,
        records_to_store,
        column_names=records_to_store[0].keys(),
    )

    cursor.copy_expert(sql_query, csv_content)
    conn.commit()

    logger.info("Inserted {} records".format(len(records_to_store)))

コード例 #3

0

ファイルを表示

ファイル: nemweb_import.py プロジェクト: opennem/opennem

def insert_balancing_summary_records(
        records: List[BalancingSummaryImport],
        update_fields: List[str] = ["price"]) -> int:
    """ Bulk insert the balancing_summary records """

    records_to_store = [i.dict() for i in records]

    # dedupe records
    return_records_grouped = {}

    # primary key protection for bulk insert
    for pk_values, rec_value in groupby(
            records_to_store,
            key=lambda r: (
                r.get("trading_interval"),
                r.get("network_id"),
                r.get("network_region"),
            ),
    ):
        if pk_values not in return_records_grouped:
            return_records_grouped[pk_values] = list(rec_value).pop()

    records_to_store = list(return_records_grouped.values())

    sql_query = build_insert_query(BalancingSummary,
                                   ["updated_at"] + update_fields)
    conn = get_database_engine().raw_connection()
    cursor = conn.cursor()

    csv_content = generate_csv_from_records(
        BalancingSummary,
        records_to_store,
        column_names=records_to_store[0].keys(),
    )

    try:
        cursor.copy_expert(sql_query, csv_content)
        conn.commit()
    except Exception as e:
        logger.error("Error inserting records: {}".format(e))
        return 0

    logger.info("Inserted {} records".format(len(records_to_store)))

    return len(records_to_store)

コード例 #4

0

ファイルを表示

ファイル: nemweb_import.py プロジェクト: opennem/opennem

def insert_scada_records(records: List[DispatchUnitSolutionOld]) -> int:
    """ Bulk insert the scada records """

    records_to_store = [i.dict() for i in records]

    # dedupe records
    return_records_grouped = {}

    for pk_values, rec_value in groupby(
            records_to_store,
            key=lambda r: (
                r.get("trading_interval"),
                r.get("network_id"),
                r.get("facility_code"),
            ),
    ):
        if pk_values not in return_records_grouped:
            return_records_grouped[pk_values] = list(rec_value).pop()

    records_to_store = list(return_records_grouped.values())

    sql_query = build_insert_query(FacilityScada, ["updated_at", "generated"])
    conn = get_database_engine().raw_connection()
    cursor = conn.cursor()

    csv_content = generate_csv_from_records(
        FacilityScada,
        records_to_store,
        column_names=records_to_store[0].keys(),
    )

    try:
        cursor.copy_expert(sql_query, csv_content)
        conn.commit()
    except Exception as e:
        logger.error("Error inserting records: {}".format(e))
        return 0

    logger.info("Inserted {} records".format(len(records_to_store)))

    return len(records_to_store)

コード例 #5

0

ファイルを表示

def insert_energies(results: List[Dict], network: NetworkSchema) -> int:
    """Takes a list of generation values and calculates energies and bulk-inserts
    into the database"""

    # Get the energy sums as a dataframe
    esdf = energy_sum(results, network=network)

    # Add metadata
    esdf["created_by"] = "opennem.worker.energy"
    esdf["created_at"] = ""
    esdf["updated_at"] = datetime.now()
    esdf["generated"] = None
    esdf["is_forecast"] = False
    esdf["energy_quality_flag"] = 0

    # reorder columns
    columns = [
        "created_by",
        "created_at",
        "updated_at",
        "network_id",
        "trading_interval",
        "facility_code",
        "generated",
        "eoi_quantity",
        "is_forecast",
        "energy_quality_flag",
    ]
    esdf = esdf[columns]

    records_to_store: List[Dict] = esdf.to_dict("records")

    if len(records_to_store) < 1:
        logger.error("No records returned from energy sum")
        return 0

    # dedupe records
    return_records_grouped = {}

    for pk_values, rec_value in groupby(
            records_to_store,
            key=lambda r: (
                r.get("trading_interval"),
                r.get("network_id"),
                r.get("facility_code"),
            ),
    ):
        if pk_values not in return_records_grouped:
            return_records_grouped[pk_values] = list(rec_value).pop()

    records_to_store = list(return_records_grouped.values())

    # Build SQL + CSV and bulk-insert
    sql_query = build_insert_query(FacilityScada,
                                   ["updated_at", "eoi_quantity"])
    conn = get_database_engine().raw_connection()
    cursor = conn.cursor()

    csv_content = generate_csv_from_records(
        FacilityScada,
        records_to_store,
        column_names=records_to_store[0].keys(),
    )

    try:
        cursor.copy_expert(sql_query, csv_content)
        conn.commit()
    except Exception as e:
        logger.error("Error inserting records: {}".format(e))
        return 0

    logger.info("Inserted {} records".format(len(records_to_store)))

    return len(records_to_store)

コード例 #6

0

ファイルを表示

def parse_bom(file: FilePath) -> int:
    num_inserts = 0
    limit = 0
    read_count = 0
    records_to_store = []
    observation_times = []

    with file.open() as fh:
        # fh.readline()

        csvreader = csv.DictReader(
            [bom_parse_date(l) for l in fh if re.match(_valid_csv_line, l)],
            delimiter=",",
            fieldnames=BOM_FORMAT_FIELDS,
        )
        for row in csvreader:

            station_id_field = None

            if "station_id" in row:
                station_id_field = "station_id"

            if "Bureau of Meteorology station number" in row:
                station_id_field = "Bureau of Meteorology station number"

            if not station_id_field:
                raise Exception("Could not find station id field")

            row[station_id_field] = bom_pad_stationid(row[station_id_field])
            row[station_id_field] = bom_station_id_remap(row[station_id_field])

            if "station_id" in row:
                record = parse_record_bom(row)

                ot = record["observation_time"]

                if ot in observation_times:
                    continue

                observation_times.append(ot)

                records_to_store.append(record)
                read_count += 1

                if limit and limit > 0 and read_count >= limit:
                    break

    sql_query = build_insert_query(BomObservation, BOM_DB_UPDATE_FIELDS)

    conn = get_database_engine().raw_connection()
    cursor = conn.cursor()
    csv_content = generate_csv_from_records(
        BomObservation,
        records_to_store,
        column_names=records_to_store[0].keys(),
    )

    cursor.copy_expert(sql_query, csv_content)
    conn.commit()

    return num_inserts