Ejemplo n.º 1
0
    def process_item(self, item, spider=None):
        if not "generators" in item:
            raise Exception("No generators found in item pipeline failed")

        generators = item["generators"]

        # Add clean station names and if group_by name
        generators = [{
            **i,
            "name":
            station_name_cleaner(i["station_name"]),
            "name_join":
            False if facility_station_join_by_name(
                station_name_cleaner(i["station_name"])) else i["duid"],
        } for i in generators]

        # sort by name

        generators_grouped = {}

        for k, v in groupby(generators,
                            key=lambda v: (v["name"], v["name_join"])):
            key = k
            if not key in generators_grouped:
                generators_grouped[key] = []

            generators_grouped[key] += list(v)

        # with open("registrtation-exemption-grouped.json", "w") as fh:
        #     json.dump(generators_grouped, fh, indent=4)

        return {**item, "generators": generators_grouped}
Ejemplo n.º 2
0
def stations_grouper(tables):

    if "PARTICIPANT_REGISTRATION_STATION" not in tables:
        raise Exception("No PARTICIPANT_REGISTRATION_STATION table")

    records = tables["PARTICIPANT_REGISTRATION_STATION"]

    mms = tables["mms"] if "mms" in tables else {}

    records = [{
        "id": _id,
        "updated_at": parse_mms_date(i["LASTCHANGED"]),
        "name": station_name_cleaner(i["STATIONNAME"]),
        "code": i["STATIONID"],
        "station_code": i["STATIONID"],
        "network_name": i["STATIONNAME"],
        "address1": i["ADDRESS1"],
        "address2": i["ADDRESS2"],
        "locality": i["CITY"],
        "state": i["STATE"],
        "postcode": i["POSTCODE"],
        "facilities": [],
    } for _id, i in enumerate(records, start=1000)]

    for record in records:
        station_code = record["station_code"]

        if station_code not in mms:
            mms[station_code] = {}

        mms[station_code] = record

    tables["mms"] = mms

    return tables
Ejemplo n.º 3
0
    def test_dashed_names_whitespace_capitalized(self):
        name = "catagunya/liapootah / wayatinah"
        subject = station_name_cleaner(name)

        assert (
            subject == "Catagunya / Liapootah / Wayatinah"
        ), "Catagunya slash name whitespaced and capitalized correctly"
Ejemplo n.º 4
0
    def process_item(self, item, spider=None):

        s = self.session()

        records_updated = 0
        records_created = 0

        for record in item:
            created = False

            duid = normalize_duid(record["STATIONID"])
            name = station_name_cleaner(record["STATIONNAME"])
            network_name = normalize_string(record["STATIONNAME"])
            address1 = normalize_string(record["ADDRESS1"])
            address2 = normalize_string(record["ADDRESS2"])
            city = normalize_string(record["CITY"])
            state = normalize_string(record["STATE"]).capitalize()
            postcode = normalize_string(record["POSTCODE"])

            station = (s.query(Station).filter(
                Station.network_code == duid).one_or_none())

            if not station:
                station = Station(
                    code=duid,
                    network_code=duid,
                    created_by="au.nem.mms.stations",
                )

                records_created += 1
                created = True
            else:
                station.updated_by = "au.nem.mms.stations"
                records_updated += 1

            station.name = name
            station.network_id = "NEM"
            station.network_name = network_name
            station.address1 = address1
            station.address2 = address2
            station.locality = city
            station.state = state
            station.postcode = postcode

            try:
                s.add(station)
                s.commit()
            except Exception as e:
                logger.error(e)

            logger.debug("{} station record with id {}".format(
                "Created" if created else "Updated", duid))

        logger.info("Created {} records and updated {}".format(
            records_created, records_updated))
Ejemplo n.º 5
0
    def test_hallet_is_three_units(self):
        hallet_names = [
            "Hallett Power Station",
            "Hallett 1 Wind Farm",
            "Hallett 2 Wind Farm",
        ]
        hallet_names_cleaned = list(
            set([station_name_cleaner(i) for i in hallet_names]))

        assert len(hallet_names) == len(
            hallet_names_cleaned), "Hallet should have three distinct names"
Ejemplo n.º 6
0
def parse_aemo_general_information(filename: str) -> List[AEMOGIRecord]:
    wb = load_workbook(filename, data_only=True)

    SHEET_KEY = "ExistingGeneration&NewDevs"

    if SHEET_KEY not in wb:
        raise Exception("Doesn't look like a GI spreadsheet")

    ws = wb[SHEET_KEY]

    records = []

    for row in ws.iter_rows(min_row=3, values_only=True):

        # pick out the columns we want
        # lots of hidden columns in the sheet
        row_collapsed = [
            row[excel_column_to_column_index(i) - 1] for i in GI_EXISTING_NEW_GEN_KEYS.values()
        ]

        return_dict = dict(zip(GI_EXISTING_NEW_GEN_KEYS, list(row_collapsed)))

        # break at end of data records
        # GI has a blank line before garbage notes
        if row[0] is None:
            break

        if return_dict is None:
            raise Exception("Failed on row: {}".format(row))

        return_dict = {
            **return_dict,
            **{
                "name": station_name_cleaner(return_dict["StationName"]),
                "status_id": aemo_gi_status_map(return_dict["UnitStatus"]),
                "fueltech_id": aemo_gi_fueltech_to_fueltech(return_dict["FuelSummary"]),
            },
        }

        return_model = AEMOGIRecord(**return_dict)

        records.append(return_model)

    return records
Ejemplo n.º 7
0
def wikidata_parse() -> None:

    # query: https://w.wiki/dVi
    # download the simplified json and save to wikidata.json
    wikidata = load_data("wikidata.json", from_project=True)

    out_entries = []
    total_entries = len(wikidata)
    current = 0

    for entry in wikidata:
        wikilink = article_from_wikipedia(entry["article"])
        wikidata = dataid_from_url(entry["item"])
        station_name = station_name_cleaner(entry["itemLabel"])

        description = None

        try:
            description = wikipedia.summary(wikilink)
        except Exception as e:
            print(e)

        new_entry = {
            "wikipedia": entry["article"],
            "wikidata": entry["item"],
            "wiki_id": wikilink,
            "wikidata_id": wikidata,
            "name": station_name,
            "name_original": entry["itemLabel"],
            "description": description,
        }

        out_entries.append(new_entry)
        current += 1

        print("Done {} of {}".format(current, total_entries))

    with open("data/wikidata-parsed.json", "w") as fh:
        json.dump(out_entries, fh)
Ejemplo n.º 8
0
def test_station_name_cleaner(station_name: str, station_name_clean: str) -> None:
    subject = station_name_cleaner(station_name)

    assert subject == station_name_clean, "Clean name matches"
Ejemplo n.º 9
0
    def test_dashed_names_with_stripping(self):
        name = "Catagunya / Liapootah / Wayatinah Power Station"
        subject = station_name_cleaner(name)

        assert (subject == "Catagunya / Liapootah / Wayatinah"
                ), "Catagunya hyphenated name"
Ejemplo n.º 10
0
    def test_strip_combined_cycle(self):
        name = "Tamar Valley Combined Cycle"
        subject = station_name_cleaner(name)

        assert (subject == "Tamar Valley"
                ), "Tamar Valley Combined Cycle stripped to suburb"
Ejemplo n.º 11
0
    def test_strip_landfill(self):
        name = "Broadmeadows Landfill"
        subject = station_name_cleaner(name)

        assert (subject == "Broadmeadows"
                ), "Broadmeadows Landfill becomes Broadmeadows"
Ejemplo n.º 12
0
    def test_grosvenor_stripping(self):
        name = "Grosvenor 1 Waste Coal Mine Gas Power Station"
        subject = station_name_cleaner(name)

        assert subject == "Grosvenor 1", "Grosvenor strips specifications"
Ejemplo n.º 13
0
    def test_dashed_names_with_stripping_capitalized(self):
        name = "Catagunya / Liapootah / woy woy power station"
        subject = station_name_cleaner(name)

        assert (subject == "Catagunya / Liapootah / Woy Woy"
                ), "Catagunya hyphenated name"
Ejemplo n.º 14
0
    def test_power_stripper(self):
        name = "Test Power Station"
        subj = station_name_cleaner(name)

        assert subj == "Test", "Test power station becomes just Test"
Ejemplo n.º 15
0
def rel_grouper(records, station_code_map):
    records_parsed = []

    for _id, i in enumerate(records, start=2000):
        name = station_name_cleaner(i["station_name"])
        duid = normalize_duid(i["duid"])
        unit = parse_unit_duid(i["unit_no"], duid)
        fueltech = lookup_fueltech(
            i["fuel_source_primary"],
            i["fuel_source_descriptor"],
            i["tech_primary"],
            i["tech_primary_descriptor"],
            i["dispatch_type"],
        )
        station_code = lookup_station_code([duid], i["station_name"],
                                           station_code_map)

        records_parsed.append({
            "name":
            name,
            "code":
            duid,
            "status":
            parse_facility_status("operating"),
            "station_code":
            station_code,
            "network_region":
            i["region"].strip(),
            "network_name":
            i["station_name"].strip(),
            "unit_size":
            clean_capacity(i["unit_size"]),
            "unit_code":
            get_unit_code(unit, duid, name),
            "dispatch_type":
            parse_dispatch_type(i["dispatch_type"]),
            "fueltech":
            parse_facility_fueltech(fueltech),
            "capacity_registered":
            clean_capacity(i["reg_cap"]),
            "capacity_maximum":
            clean_capacity(i["max_cap"]),
        })

    grouped_records = {}

    for key, v in groupby(records_parsed, key=lambda v: v["station_code"]):

        # key = k[1
        if key not in grouped_records:
            grouped_records[key] = []

        grouped_records[key] += list(v)

    coded_records = {}
    _id = 2000

    for station_code, rel in grouped_records.items():
        station_name = rel[0]["network_name"]

        if station_code in coded_records:
            raise Exception("Code conflict: {}. {} {}".format(
                station_code, station_name, coded_records[station_code]))

        if not station_code:
            raise Exception("Unmapped station: {}".format(rel))

        coded_records[station_code] = {
            "name": station_name_cleaner(station_name),
            "network_name": station_name,
            "code": station_code,
            "id": _id,
            "facilities": rel,
        }

        _id += 1

    return coded_records
Ejemplo n.º 16
0
    def test_acronyms(self):
        name = "bhp power"
        subj = station_name_cleaner(name)

        assert subj == "BHP", "Acronym is BHP"
Ejemplo n.º 17
0
    def test_hallett_power(self):
        name = "Hallett Power Station"
        subj = station_name_cleaner(name)

        assert subj == "Hallett", "Hallet Power Station is Hallet"
Ejemplo n.º 18
0
    def test_name_mapping_and_stripping(self):
        name = "SA Government Virtual Power Plant - stage 1"
        subject = station_name_cleaner(name)

        assert (subject == "SA VPP"
                ), "SA Government Virtual Power Plant maps to SA VPP"
Ejemplo n.º 19
0
    def test_stripping_units(self):
        name = "Eastern Creek LFG PS Units 1-4"
        subj = station_name_cleaner(name)

        assert subj == "Eastern Creek", "Eastern Creek should strip units"
Ejemplo n.º 20
0
    def test_name_mapping_hornsdale(self):
        name = "Hornsdale Power Reserve Unit 1"
        subject = station_name_cleaner(name)

        assert subject == "Hornsdale Power Reserve", "Hornsdale maps"
Ejemplo n.º 21
0
    def test_unit_letters(self):
        name = "Yallourn 'W' Power Station"
        subject = station_name_cleaner(name)

        assert subject == "Yallourn W", "Yallourn has a unit letter"
Ejemplo n.º 22
0
    def test_name_uni_melbourne(self):
        name = "University of Melbourne Archives Brunswick"
        subject = station_name_cleaner(name)

        assert (subject == "UoM Archives Brunswick"
                ), "UoM is abbreviated and suburb name added"
Ejemplo n.º 23
0
    def test_strip_waste_disposal(self):
        name = "Wyndham Waste Disposal Facility"
        subject = station_name_cleaner(name)

        assert (
            subject == "Wyndham"), "Whyndham Waste Disposal stripped to suburb"
Ejemplo n.º 24
0
    def test_name_energy_brix(self):
        name = "Energy Brix Complex"
        subject = station_name_cleaner(name)

        assert subject == "Morwell", "Energy Brix Complex becomes Morwell"
Ejemplo n.º 25
0
    def test_dashed_names(self):
        name = "Catagunya / Liapootah / Wayatinah"
        subject = station_name_cleaner(name)

        assert (subject == "Catagunya / Liapootah / Wayatinah"
                ), "Catagunya slash name"
Ejemplo n.º 26
0
    def test_government_virtual(self):
        name = "SA Government Virtual Power Plant - stage 1"
        subject = station_name_cleaner(name)

        assert subject == "SA VPP", "SA Government Virtual becomes SA VPP"
Ejemplo n.º 27
0
def update_existing_geos() -> None:
    """
    Old method to update geos from existing facilities file on OpenNEM
    """

    station_fixture = load_data("facility_registry.json", from_fixture=True)

    stations = [{"station_code": k, **v} for k, v in station_fixture.items()]

    s = SessionLocal()

    for station_data in stations:
        station = None

        station_name = station_name_cleaner(station_data["display_name"])
        station_code = normalize_duid(station_data["station_code"])
        station_state = map_compat_facility_state(
            station_data["status"]["state"])

        station = s.query(Station).filter(
            Station.network_code == station_code).one_or_none()

        if not station:
            logger.info("Could not find station {}".format(station_code))
            continue

        if ("location" in station_data
                and "latitude" in station_data["location"]
                and station_data["location"]["latitude"]):
            station.geom = ("SRID=4326;POINT({} {})".format(
                station_data["location"]["latitude"],
                station_data["location"]["longitude"],
            ), )
            station.geocode_processed_at = datetime.now()
            station.geocode_by = "opennem"
            station.geocode_approved = True

            station.updated_by = "fixture.registry"

        s.add(station)

        logger.info("Updated station geo location {} ({})".format(
            station.code,
            station.name,
        ))

        facilities = [{
            "code": k,
            **v
        } for k, v in stations[0]["duid_data"].items()]

        # update fueltechs
        for facility_data in facilities:
            facility_duid = facility_data["code"]
            facility_fueltech = lookup_fueltech(facility_data["fuel_tech"])

            facility = s.query(Facility).filter(
                Facility.network_code == facility_duid).first()

            if not facility:
                logger.error(
                    "Could not find existing facility {} for station {}".
                    format(facility_duid, station_code))
                continue

            if not facility.fueltech_id:
                facility.fueltech_id = facility_fueltech

            if facility.fueltech_id != facility_fueltech:
                logger.error(
                    "Fueltech mismatch for {}. Old is {} and new is {}".format(
                        station_code, facility_fueltech, station.fueltech_id))

            s.add(facility)

        s.commit()
Ejemplo n.º 28
0
    def test_swanbank_b(self):
        name = "Swanbank B Power Station & Swanbank E Gas Turbine"
        subject = station_name_cleaner(name)

        assert subject == "Swanbank B", "Swanbank B"
Ejemplo n.º 29
0
    def process_facilities(self, records):
        s = self.session()

        # Store a list of all existing duids
        all_duids = list(
            set([
                i[0] for i in s.query(Facility.network_code).filter(
                    Facility.network_code != None).all()
            ]))

        for _, facility_records in records.items():
            facility_index = 1
            facility_station = None
            created_station = False

            station_network_name = record_get_station_name(facility_records)
            station_name = station_name_cleaner(station_network_name)

            duid_unique = has_unique_duid(facility_records)
            facility_count = len(facility_records)

            # Step 1. Find the station
            # First by duid if it's unique
            duid = get_unique_duid(facility_records)

            # all GI records should have a region
            station_network_region = get_unique_reqion(facility_records)

            # This is the most suitable unit record to use for the station
            # see helper above
            facility_station_record = get_station_record_from_facilities(
                facility_records)

            if duid and duid_unique and facility_count == 1:

                facility_lookup = None

                try:
                    facility_lookup = (s.query(Facility).filter(
                        Facility.network_code == duid).filter(
                            Facility.network_region ==
                            station_network_region).one_or_none())
                except MultipleResultsFound:
                    logger.error(
                        "Found multiple duid for station with code {}".format(
                            duid))
                    continue

                if facility_lookup and facility_lookup.station:
                    facility_station = facility_lookup.station

            if (duid and (duid_unique and facility_count > 1)
                    or not duid_unique):

                facility_lookup = (s.query(Facility).filter(
                    Facility.network_code == duid).filter(
                        Facility.network_region ==
                        station_network_region).first())

                if facility_lookup and facility_lookup.station:
                    facility_station = facility_lookup.station

            if not facility_station and facility_station_join_by_name(
                    station_name):
                try:
                    facility_station = (s.query(Station).filter(
                        Station.name == station_name).one_or_none())
                except MultipleResultsFound:
                    logger.warning(
                        "Multiple results found for station name : {}".format(
                            station_name))
                    facility_station = None

            # If we have a station name, and no duid, and it's ok to join by name
            # then find the station (make sure to region lock)
            if (station_name and not duid and not facility_station
                    and facility_station_join_by_name(station_name)):
                facility = (s.query(Facility).join(Facility.station).filter(
                    Facility.network_region == station_network_region).filter(
                        Station.name == station_name).first())

                if facility:
                    facility_station = facility.station

            # Create one as it doesn't exist
            if not facility_station:
                facility_station = Station(
                    name=station_name,
                    network_name=name_normalizer(
                        facility_station_record["station_name"]),
                    network_id="NEM",
                    created_by="pipeline.aemo.general_information",
                )

                s.add(facility_station)
                s.commit()

                created_station = True
            else:
                facility_station.updated_by = (
                    "pipeline.aemo.general_information")

            for facility_record in facility_records:
                if facility_record["FuelType"] in ["Natural Gas Pipeline"]:
                    continue

                # skip these statuses too
                if facility_record["UnitStatus"] in FACILITY_INVALID_STATUS:
                    continue

                facility = None
                created_facility = False

                facility_network_name = name_normalizer(
                    facility_record["station_name"])
                facility_name = station_name_cleaner(
                    facility_record["station_name"])
                duid = normalize_duid(facility_record["duid"])
                reg_cap = clean_capacity(facility_record["NameCapacity"])

                units_num = facility_record["Units"] or 1
                unit_id = facility_index + (units_num - 1)

                unit = parse_unit_duid(unit_id, duid)
                unit_size = clean_capacity(facility_record["unit_capacity"])
                unit_code = get_unit_code(unit, duid,
                                          facility_record["station_name"])

                facility_comissioned = facility_record["SurveyEffective"]
                facility_comissioned_dt = None

                if type(facility_comissioned) is datetime:
                    facility_comissioned_dt = facility_comissioned

                try:
                    if type(facility_comissioned) is str:
                        facility_comissioned_dt = datetime.strptime(
                            facility_comissioned, "%d/%m/%y")
                except ValueError:
                    logger.error(
                        "Error parsing date: {}".format(facility_comissioned))

                facility_status = map_aemo_facility_status(
                    facility_record["UnitStatus"])
                facility_network_region = normalize_aemo_region(
                    facility_record["Region"])
                facility_fueltech = (lookup_fueltech(
                    facility_record["FuelType"],
                    techtype=facility_record["TechType"],
                ) if ("FuelType" in facility_record
                      and facility_record["FuelType"]) else None)

                if not facility_fueltech:
                    logger.error("Error looking up fueltech: {} {} ".format(
                        facility_record["FuelType"],
                        facility_record["TechType"],
                    ))

                # check if we have it by ocode first
                facility = (s.query(Facility).filter(
                    Facility.code == unit_code).one_or_none())

                if not facility and duid:
                    try:
                        facility = (
                            s.query(Facility).filter(
                                Facility.network_code == duid).filter(
                                    Facility.network_region ==
                                    facility_network_region)
                            # .filter(Facility.nameplate_capacity != None)
                            .one_or_none())
                    except MultipleResultsFound:
                        logger.warn(
                            "Multiple results found for duid : {}".format(
                                duid))

                    if facility:
                        if facility.station and not facility_station:
                            facility_station = facility.station

                        logger.info(
                            "GI: Found facility by DUID: code {} station {}".
                            format(
                                facility.code,
                                facility.station.name
                                if facility.station else None,
                            ))

                # Done trying to find existing
                if not facility:
                    facility = Facility(
                        code=unit_code,
                        network_code=duid,
                        created_by="pipeline.aemo.general_information",
                    )
                    facility.station = facility_station

                    created_facility = True

                if duid and not facility.network_code:
                    facility.network_code = duid
                    facility.updated_by = "pipeline.aemo.general_information"

                if not facility.network_region:
                    facility.network_region = facility_network_region
                    facility.updated_by = "pipeline.aemo.general_information"

                if not facility.network_name:
                    facility.network_name = facility_network_name
                    facility.updated_by = "pipeline.aemo.general_information"

                if not facility.fueltech_id and facility_fueltech:
                    facility.fueltech_id = facility_fueltech
                    facility.updated_by = "pipeline.aemo.general_information"

                if not facility.capacity_registered or (
                        facility.status and facility.status != "operating"):
                    facility.capacity_registered = reg_cap
                    facility.updated_by = "pipeline.aemo.general_information"

                # @TODO work this out
                # facility.dispatch_type = facility_dispatch_type

                if not facility.unit_id:
                    facility.unit_id = unit.id
                    facility.unit_number = unit.number
                    facility.unit_size = unit_size
                    facility.unit_alias = unit.alias

                if not facility.unit_capacity or (
                        facility.status and facility.status != "operating"):
                    facility.unit_capacity = unit_size
                    facility.updated_by = "pipeline.aemo.general_information"

                # if not facility.status_id:
                facility.status_id = facility_status
                # facility.updated_by = "pipeline.aemo.general_information"

                if not facility.registered and facility_comissioned_dt:
                    facility.registered = facility_comissioned_dt
                    facility.updated_by = "pipeline.aemo.general_information"

                facility.station = facility_station

                if facility.fueltech_id is None:
                    logger.warning("Could not find fueltech for: {} {}".format(
                        facility.code, facility.network_code))

                # facility.status_id = facility_status

                if facility_station and not facility.station:
                    facility.station = facility_station

                if facility.status_id is None:
                    raise Exception(
                        "GI: Failed to map status ({}) on row: {}".format(
                            facility.status_id, facility_record))

                s.add(facility)
                s.commit()

                facility_index += units_num

                if created_station:
                    logger.info("GI: {} station with name {} ".format(
                        "Created" if created_station else "Updated",
                        station_name,
                        # facility_station.id,
                    ))

                if created_facility:
                    logger.info(
                        "GI: {} facility with duid {} to station {}".format(
                            "Created" if created_facility else "Updated",
                            duid,
                            station_name,
                        ))

        try:
            s.commit()
        except Exception as e:
            logger.error(e)
            raise e
        finally:
            s.close()
Ejemplo n.º 30
0
    def test_swanbank_e_single(self):
        name = "Swanbank E"
        subject = station_name_cleaner(name)

        assert subject == "Swanbank E", "Swanbank E"