def load_bom_stations_json() -> None: """ Imports BOM stations into the database from bom_stations.json The json is obtained using scripts/bom_stations.py """ session = SessionLocal() bom_stations = load_data("bom_stations.json", from_project=True) bom_capitals = load_data("bom_capitals.json", from_project=True) codes = [] if not bom_stations: logger.error("Could not load bom stations") stations_imported = 0 for bom_station in bom_stations: if "code" not in bom_station: logger.error("Invalida bom station ..") continue if bom_station["code"] in codes: continue codes.append(bom_station["code"]) station = session.query(BomStation).filter_by( code=bom_station["code"]).one_or_none() if not station: logger.info("New BOM station: %s", bom_station["name"]) station = BomStation(code=bom_station["code"], ) station.name = bom_station["name_full"] station.name_alias = bom_station["name"] station.website_url = bom_station["url"] station.feed_url = bom_station["json_feed"] station.priority = 5 station.state = bom_station["state"] station.altitude = bom_station["altitude"] if "web_code" in bom_station: station.web_code = bom_station["web_code"] if bom_station["code"] in bom_capitals: station.is_capital = True station.priority = 1 station.geom = "SRID=4326;POINT({} {})".format(bom_station["lng"], bom_station["lat"]) stations_imported += 1 session.add(station) logger.info("Imported {} stations".format(stations_imported)) session.commit()
def setup_class(cls): """ Load the stations fixture for this test suite """ cls.data = load_data("stations.json", from_project=True) cls.statuses = load_data("fueltechs.json", from_fixture=True) cls.fueltechs = load_data("facility_status.json", from_fixture=True)
def opennem_import_patches(): """ Reads the OpenNEM data source """ opennem_records = load_data("opennem.csv", from_project=True) for rec in opennem_records: logger.debug(rec) if "record_type" not in rec: raise Exception("Invalid CSV: No record_type") record_type = rec["record_type"] if record_type not in RECORD_MODEL_MAP: raise Exception( "Invalid record type: {} is not a valid record type".format( record_type ) ) record_model = RECORD_MODEL_MAP[record_type] return record_model
def wikidata_join(): session = SessionLocal() wikidata = load_data("wikidata-parsed.json", from_project=True) # session.add() for entry in wikidata: station_name = entry.get("name") station_lookup = (session.query(Station).filter( Station.name == station_name).all()) if len(station_lookup) == 0: logger.info("Didn't find a station for {}".format(station_name)) if len(station_lookup) == 1: station = station_lookup.pop() station.description = entry.get("description") station.wikipedia_link = entry.get("wikipedia") station.wikidata_id = entry.get("wikidata_id") session.add(station) logger.info("Updated station {}".format(station_name)) if len(station_lookup) > 1: logger.info("Found multiple for station {}".format(station_name)) session.commit()
def load_networks() -> None: """ Load the networks fixture """ fixture = load_data("networks.json", from_fixture=True) s = SessionLocal() for network in fixture: network_model = s.query(Network).filter_by( code=network["code"]).one_or_none() if not network_model: network_model = Network(code=network["code"]) network_model.label = network["label"] network_model.country = network["country"] network_model.timezone = network["timezone"] network_model.timezone_database = network["timezone_database"] network_model.offset = network["offset"] network_model.interval_size = network["interval_size"] network_model.network_price = network["network_price"] if "interval_shift" in network: network_model.interval_shift = network["interval_shift"] if "export_set" in network: network_model.export_set = network["export_set"] try: s.add(network_model) s.commit() except Exception: logger.error("Have {}".format(network_model.code))
def load_network_regions() -> None: """ Load the network region fixture """ fixture = load_data("network_regions.json", from_fixture=True) s = SessionLocal() for network_region in fixture: network_region_model = (s.query(NetworkRegion).filter_by( code=network_region["code"], network_id=network_region["network_id"]).one_or_none()) if not network_region_model: network_region_model = NetworkRegion(code=network_region["code"]) network_region_model.network_id = network_region["network_id"] try: s.add(network_region_model) s.commit() logger.debug("Loaded network region {}".format( network_region_model.code)) except Exception: logger.error("Have {}".format(network_region_model.code))
def load_bom_stations_csv() -> None: """ Imports the BOM fixed-width stations format Made redundant with the new JSON """ s = SessionLocal() station_csv = load_data("stations_db.txt", from_fixture=True) lines = station_csv.split("\n") for line in lines: code, state, name, registered, lng, lat = parse_fixed_line(line) station = s.query(BomStation).filter_by(code=code).one_or_none() if not station: station = BomStation( code=code, state=state, name=name, registered=registered, ) station.geom = "SRID=4326;POINT({} {})".format(lat, lng) try: s.add(station) s.commit() except Exception: logger.error("Have {}".format(station.code))
def upgrade() -> None: sql_query = load_data("mms_postgres.sql", from_fixture=True, skip_loaders=True) op.execute(sql_query) # reset the search path so that alembic can update the migration table op.execute("set schema 'public';")
def import_facilities() -> None: station_data = load_data("stations.json", from_project=True) stations = StationSet() for s in station_data: stations.add_dict(s) import_station_set(stations)
def mms_init() -> None: station_data = load_data("mms_stations.json", from_project=True) stations = StationSet() for s in station_data: stations.add_dict(s) import_station_set(stations)
def _load_user_agents() -> List[str]: """Load list of user agents from data source""" _agents: bytes = load_data("user_agents.txt", from_project=True) agents = [ i.decode("utf-8") for i in _agents.splitlines() if len(i) > 0 and not i.decode("utf-8").startswith("More ") ] return agents
def load_statuses() -> dict: statuses = load_data("facility_status.json", from_fixture=True) status_dict = {} for s in statuses: _code = s.get("code", None) status_dict[_code] = s return status_dict
def load_fueltechs() -> dict: fueltechs = load_data("fueltechs.json", from_fixture=True) fueltechs_dict = {} for s in fueltechs: _code = s.get("code", None) fueltechs_dict[_code] = s return fueltechs_dict
def run_import_opennem_registry(): station_fixture = load_data("facility_registry.json", from_fixture=True) mms = run_import_mms() nem_stations = { i: v for i, v in station_fixture.items() if v["location"]["state"] not in ["WA"] } for station_key, station_record in nem_stations.items(): if not station_key in mms: print("Got {} which is not in registry".format(station_key))
def get_fueltechs() -> List[FueltechSchema]: fixture = load_data("fueltechs.json", from_fixture=True) fueltechs = [] f: Dict = None for f in fixture: _f = FueltechSchema(**f) fueltechs.append(_f) return fueltechs
def gi_import(): mms_duid_station_map = load_data("mms_duid_station_map.json", True) nem_gi = load_gi() nem_gi = gi_grouper(nem_gi, mms_duid_station_map) gi = StationSet() for r in nem_gi.values(): gi.add_dict(r) return gi
def wikidata_join_mapping() -> None: """Attempts to join the wikidata to OpenNEM stations using the csv file with mappings""" session = SessionLocal() wikidata = load_data("wikidata-parsed.json", from_project=True) wikidata_mappings = None with open("opennem/data/wikidata_mappings.csv") as fh: csvreader = csv.DictReader( fh, fieldnames=[ "code", "name", "network_id", "network_region", "fueltech_id", "wikidata_id", ], ) wikidata_mappings = { i["code"]: i["wikidata_id"] for i in list(csvreader) if i["wikidata_id"] and i["code"] != "code" } for station_code, wikidata_id in wikidata_mappings.items(): wikidata_record_lookup = list( filter(lambda x: x["wikidata_id"] == wikidata_id, wikidata)) if len(wikidata_record_lookup) == 0: logger.error("Could not find {}".format(wikidata_id)) continue wikidata_record = wikidata_record_lookup.pop() station = session.query(Station).filter( Station.code == station_code).one_or_none() if not station: logger.error("Didn't find a station for {}".format(station_code)) continue station.description = wikidata_record.get("description") station.wikipedia_link = wikidata_record.get("wikipedia") station.wikidata_id = wikidata_record.get("wikidata_id") session.add(station) logger.info("Updated station {}".format(station_code)) session.commit()
def rel_import(): mms_duid_station_map = load_data("mms_duid_station_map.json", True) nem_rel = load_rel() nem_rel = rel_grouper(nem_rel, mms_duid_station_map) rel = StationSet() for s in nem_rel.values(): rel.add_dict(s) return rel
def load_aemo_csv(item, filename): if not item: item = {} if type(item) is not dict: raise Exception( "Invalid item type expecting a dict so we can fill it ") current_item = load_data(filename, True, content_type="latin-1") if "content" not in current_item: logger.error("No content in item to parse") return item content = current_item["content"] del current_item["content"] table_name = None table_values = "" table_fields = [] table_records = [] content_split = content.splitlines() datacsv = csv.reader(content_split) for row in datacsv: if not row or type(row) is not list or len(row) < 1: continue record_type = row[0] if record_type == "C": # @TODO csv meta stored in table if table_name is not None: item[table_name] = table_records elif record_type == "I": if table_name is not None: item[table_name] = table_records table_name = "{}_{}".format(row[1], row[2]) table_fields = row[4:] table_records = [] elif record_type == "D": table_values = row[4:] record = dict(zip(table_fields, table_values)) table_records.append(record) return item
def load_bom_stations_json(): """ Imports BOM stations into the database from bom_stations.json The json is obtained using scripts/bom_stations.py """ session = SessionLocal() bom_stations = load_data("bom_stations.json", from_project=True) bom_capitals = load_data("bom_capitals.json", from_project=True) for bom_station in bom_stations: station = (session.query(BomStation).filter_by( code=bom_station["code"]).one_or_none()) if not station: logger.info("New station: %s", bom_station["name"]) station = BomStation(code=bom_station["code"], ) station.name = bom_station["name_full"] station.name_alias = bom_station["name"] station.website_url = bom_station["url"] station.feed_url = bom_station["json_feed"] station.priority = 5 station.state = bom_station["state"] station.altitude = bom_station["altitude"] if bom_station["code"] in bom_capitals: station.is_capital = True station.priority = 1 station.geom = "SRID=4326;POINT({} {})".format(bom_station["lng"], bom_station["lat"]) session.add(station) session.commit()
def load_units() -> List[UnitDefinition]: units_dics = load_data("units.json") units = [UnitDefinition(**i) for i in units_dics] # unique aliases in set assert len([i.name_alias for i in units if i.name_alias ]) == len(set([i.name_alias for i in units if i.name_alias ])), "Unique names for aliases required" # unique names in assert len([i.name for i in units]) == len(set([i.name for i in units ])), "Unique unit names required" return units
def load_logging_config(filename: str = "logging.yml", fail_silent: bool = True) -> Optional[dict]: """Load logging configuration from yml file""" settings_file_content = load_data(filename, from_settings=True) if not settings_file_content: if fail_silent: return None raise SettingsNotFound( "Not a valid logging settings file: {}".format(filename)) config_data = yaml.safe_load(settings_file_content) return config_data
def load_registry() -> List[StationSchema]: """ Loads the facility registry into a list of Station schema's """ stations = load_data("facility_registry.json") records = [] for station_id, station_record in stations.items(): facilities = [] for duid, facility_record in station_record["duid_data"].items(): status = map_compat_facility_state( station_record.get("status", {}).get("state", "") ) fuel_tech = map_compat_fueltech( facility_record.get("fuel_tech", "") ) registered_capacity = clean_capacity( facility_record.get("registered_capacity") ) facility = FacilitySchema( name=normalize_whitespace(station_record.get("display_name")), network_region=map_compat_network_region( station_record["region_id"] ), status=status, duid=duid, fueltech=fuel_tech, capacity=registered_capacity, ) facilities.append(facility) record = StationSchema( name=normalize_whitespace(station_record.get("display_name")), code=station_id, state=station_record.get("location", {}).get("state", None), facilities=_sort_facilities(facilities), ) records.append(record) records = _sort_stations(records) return records
def import_dump_emissions() -> List[Dict]: content = load_data("emissions_output.csv", from_project=True, skip_loaders=True) csv_content = content.splitlines() csvreader = csv.DictReader(csv_content) records = [] for rec in csvreader: records.append({ "facility_code": rec["DUID"], "emissions_factor_co2": clean_float(rec["CO2E_EMISSIONS_FACTOR"]), }) return records
def load_facilitystatus() -> None: """ Load the facility status fixture """ fixture = load_data("facility_status.json", from_fixture=True) s = SessionLocal() for status in fixture: facility_status = s.query(FacilityStatus).filter_by( code=status["code"]).one_or_none() if not facility_status: facility_status = FacilityStatus(code=status["code"], ) facility_status.label = status["label"] try: s.add(facility_status) s.commit() except Exception: logger.error("Have {}".format(facility_status.code))
def load_fueltechs() -> None: """ Load the fueltechs fixture """ fixture = load_data("fueltechs.json", from_fixture=True) s = SessionLocal() for ft in fixture: fueltech = s.query(FuelTech).filter_by(code=ft["code"]).one_or_none() if not fueltech: fueltech = FuelTech(code=ft["code"], ) fueltech.label = ft["label"] fueltech.renewable = ft["renewable"] try: s.add(fueltech) s.commit() except Exception: logger.error("Have {}".format(fueltech.code))
def wikidata_parse() -> None: # query: https://w.wiki/dVi # download the simplified json and save to wikidata.json wikidata = load_data("wikidata.json", from_project=True) out_entries = [] total_entries = len(wikidata) current = 0 for entry in wikidata: wikilink = article_from_wikipedia(entry["article"]) wikidata = dataid_from_url(entry["item"]) station_name = station_name_cleaner(entry["itemLabel"]) description = None try: description = wikipedia.summary(wikilink) except Exception as e: print(e) new_entry = { "wikipedia": entry["article"], "wikidata": entry["item"], "wiki_id": wikilink, "wikidata_id": wikidata, "name": station_name, "name_original": entry["itemLabel"], "description": description, } out_entries.append(new_entry) current += 1 print("Done {} of {}".format(current, total_entries)) with open("data/wikidata-parsed.json", "w") as fh: json.dump(out_entries, fh)
def get_import_photo_data( file_name: str = "photos.csv") -> List[PhotoImportSchema]: photo_file_path: Path = load_data("photos.csv", from_project=True, return_path=True) if not photo_file_path.is_file(): raise Exception("Could not import photo file data: {}".format( str(photo_file_path))) photo_records: List[PhotoImportSchema] = [] with photo_file_path.open() as fh: # skip csv header fh.readline() csvreader = csv.DictReader(fh, fieldnames=CSV_IMPORT_FORMAT_COLUMNS) # Parse CSV records into schemas photo_records = [PhotoImportSchema(**i) for i in csvreader] return photo_records
def get_import_osm_data( file_name: str = "osm_ways.csv") -> List[OSMImportCSVSchema]: osm_ways_path: Path = load_data(file_name, from_project=True, return_path=True) if not osm_ways_path.is_file(): raise Exception("Could not import photo file data: {}".format( str(osm_ways_path))) osm_way_records: List[OSMImportCSVSchema] = [] with osm_ways_path.open() as fh: # skip csv header fh.readline() csvreader = csv.DictReader(fh, fieldnames=CSV_IMPORT_FORMAT_COLUMNS) # Parse CSV records into schemas osm_way_records = [OSMImportCSVSchema(**i) for i in csvreader] return osm_way_records
def load_current() -> List[StationSchema]: """ Load the current project station data into a list of station schemas """ station_data = load_data("stations.geojson", True) records = [] for s in station_data.get("features", []): station = s.get("properties") facilities = [] for facility in station.get("duid_data", {}): facility = FacilitySchema( name=station.get("name"), network_region=facility.get("network_region"), status=facility.get("status"), duid=facility.get("duid"), fueltech=facility.get("fuel_tech"), capacity=facility.get("capacity_registered"), ) facilities.append(facility) record = StationSchema( name=station.get("name"), code=station.get("station_code"), state=_get_state_from_current(station, facilities), facilities=_sort_facilities(facilities), ) records.append(record) records = _sort_stations(records) return records