def store_mms_table(table: AEMOTableSchema) -> int: if not table.name: logger.error("Table has no name!: {}".format(table)) return 0 # Get the table ORM model table_schema = get_mms_model(table) if not table_schema: logger.error("No table ORM schema for table name {}".format( table.name)) return 0 # update all non-primary key fields. get them dynamically. update_fields = [ i.name for i in table_schema.__table__.columns if not i.primary_key ] # type: ignore records_to_store = table.records sql_query = "" try: sql_query = build_insert_query(table_schema, update_fields) except Exception as e: logger.error(e) return 0 conn = get_database_engine().raw_connection() cursor = conn.cursor() csv_content = "" try: csv_content = generate_csv_from_records( table_schema, records_to_store, column_names=list(records_to_store[0].keys()), ) except Exception as e: logger.error(e) return 0 if not csv_content: return 0 logger.debug(csv_content.getvalue().splitlines()[:2]) cursor.copy_expert(sql_query, csv_content) conn.commit() logger.info("{}: Inserted {} records".format(table.full_name, len(records_to_store))) return len(records_to_store)
def main(): if not BOM_BACKUP_PATH.is_file(): raise Exception("Not a file: {}".format(BOM_BACKUP_PATH)) fieldnames = [i.name for i in BomObservation.__table__.columns.values()] records_to_store = [] with BOM_BACKUP_PATH.open() as fh: csvreader = csv.DictReader(fh, fieldnames=fieldnames, delimiter="\t") count_rec = 0 for row in csvreader: parsed_row = parse_record(row) records_to_store.append(parsed_row) count_rec += 1 if LIMIT_REC > 0 and count_rec > LIMIT_REC: break update_fields = [ "temp_apparent", "temp_air", "press_qnh", "wind_dir", "wind_spd", "wind_gust", "cloud", "cloud_type", "humidity", ] sql_query = build_insert_query(BomObservation, update_fields) conn = get_database_engine().raw_connection() cursor = conn.cursor() csv_content = generate_csv_from_records( BomObservation, records_to_store, column_names=records_to_store[0].keys(), ) cursor.copy_expert(sql_query, csv_content) conn.commit() logger.info("Inserted {} records".format(len(records_to_store)))
def insert_balancing_summary_records( records: List[BalancingSummaryImport], update_fields: List[str] = ["price"]) -> int: """ Bulk insert the balancing_summary records """ records_to_store = [i.dict() for i in records] # dedupe records return_records_grouped = {} # primary key protection for bulk insert for pk_values, rec_value in groupby( records_to_store, key=lambda r: ( r.get("trading_interval"), r.get("network_id"), r.get("network_region"), ), ): if pk_values not in return_records_grouped: return_records_grouped[pk_values] = list(rec_value).pop() records_to_store = list(return_records_grouped.values()) sql_query = build_insert_query(BalancingSummary, ["updated_at"] + update_fields) conn = get_database_engine().raw_connection() cursor = conn.cursor() csv_content = generate_csv_from_records( BalancingSummary, records_to_store, column_names=records_to_store[0].keys(), ) try: cursor.copy_expert(sql_query, csv_content) conn.commit() except Exception as e: logger.error("Error inserting records: {}".format(e)) return 0 logger.info("Inserted {} records".format(len(records_to_store))) return len(records_to_store)
def insert_scada_records(records: List[DispatchUnitSolutionOld]) -> int: """ Bulk insert the scada records """ records_to_store = [i.dict() for i in records] # dedupe records return_records_grouped = {} for pk_values, rec_value in groupby( records_to_store, key=lambda r: ( r.get("trading_interval"), r.get("network_id"), r.get("facility_code"), ), ): if pk_values not in return_records_grouped: return_records_grouped[pk_values] = list(rec_value).pop() records_to_store = list(return_records_grouped.values()) sql_query = build_insert_query(FacilityScada, ["updated_at", "generated"]) conn = get_database_engine().raw_connection() cursor = conn.cursor() csv_content = generate_csv_from_records( FacilityScada, records_to_store, column_names=records_to_store[0].keys(), ) try: cursor.copy_expert(sql_query, csv_content) conn.commit() except Exception as e: logger.error("Error inserting records: {}".format(e)) return 0 logger.info("Inserted {} records".format(len(records_to_store))) return len(records_to_store)
def insert_energies(results: List[Dict], network: NetworkSchema) -> int: """Takes a list of generation values and calculates energies and bulk-inserts into the database""" # Get the energy sums as a dataframe esdf = energy_sum(results, network=network) # Add metadata esdf["created_by"] = "opennem.worker.energy" esdf["created_at"] = "" esdf["updated_at"] = datetime.now() esdf["generated"] = None esdf["is_forecast"] = False esdf["energy_quality_flag"] = 0 # reorder columns columns = [ "created_by", "created_at", "updated_at", "network_id", "trading_interval", "facility_code", "generated", "eoi_quantity", "is_forecast", "energy_quality_flag", ] esdf = esdf[columns] records_to_store: List[Dict] = esdf.to_dict("records") if len(records_to_store) < 1: logger.error("No records returned from energy sum") return 0 # dedupe records return_records_grouped = {} for pk_values, rec_value in groupby( records_to_store, key=lambda r: ( r.get("trading_interval"), r.get("network_id"), r.get("facility_code"), ), ): if pk_values not in return_records_grouped: return_records_grouped[pk_values] = list(rec_value).pop() records_to_store = list(return_records_grouped.values()) # Build SQL + CSV and bulk-insert sql_query = build_insert_query(FacilityScada, ["updated_at", "eoi_quantity"]) conn = get_database_engine().raw_connection() cursor = conn.cursor() csv_content = generate_csv_from_records( FacilityScada, records_to_store, column_names=records_to_store[0].keys(), ) try: cursor.copy_expert(sql_query, csv_content) conn.commit() except Exception as e: logger.error("Error inserting records: {}".format(e)) return 0 logger.info("Inserted {} records".format(len(records_to_store))) return len(records_to_store)
def parse_bom(file: FilePath) -> int: num_inserts = 0 limit = 0 read_count = 0 records_to_store = [] observation_times = [] with file.open() as fh: # fh.readline() csvreader = csv.DictReader( [bom_parse_date(l) for l in fh if re.match(_valid_csv_line, l)], delimiter=",", fieldnames=BOM_FORMAT_FIELDS, ) for row in csvreader: station_id_field = None if "station_id" in row: station_id_field = "station_id" if "Bureau of Meteorology station number" in row: station_id_field = "Bureau of Meteorology station number" if not station_id_field: raise Exception("Could not find station id field") row[station_id_field] = bom_pad_stationid(row[station_id_field]) row[station_id_field] = bom_station_id_remap(row[station_id_field]) if "station_id" in row: record = parse_record_bom(row) ot = record["observation_time"] if ot in observation_times: continue observation_times.append(ot) records_to_store.append(record) read_count += 1 if limit and limit > 0 and read_count >= limit: break sql_query = build_insert_query(BomObservation, BOM_DB_UPDATE_FIELDS) conn = get_database_engine().raw_connection() cursor = conn.cursor() csv_content = generate_csv_from_records( BomObservation, records_to_store, column_names=records_to_store[0].keys(), ) cursor.copy_expert(sql_query, csv_content) conn.commit() return num_inserts