Example #1
0
def naptan_admin_area_stats(gdf_admin_area):
    """[summary] a method that takes a naptan loclaity gdf and returns a
    a measurement

    Args:
        gdf_admin_area (str): [Must be a string received from ].

    Raises:
        e: [description]
    """
    try:
        # load the nptg gazette.
        localities = etl.naptan_gazette_localities()
        # group by counts of the locality.
        local_stats = gdf_admin_area.value_counts(
            subset=["LocalityName", "NptgLocalityCode", "AreaName"
                    ]).reset_index()
        # drop cabbage column in place
        local_stats.drop(0, axis=1, inplace=True)
        # from nptg get the centroid locations of each locality
        local_centroid_stats = local_stats.merge(
            localities[["NptgLocalityCode", "Gazette_geometry"]],
            on="NptgLocalityCode")
        # get total count of the localities in the admin area.
        # record the stats
        local_centroid_stats.to_csv(f"{gdf_admin_area}_stats.csv",
                                    encoding="utf-8",
                                    sep=",",
                                    header=True)
        print(f"{gdf_admin_area} stats file has been generated.")
    except Exception as e:
        raise e
Example #2
0
    def find_unused_localities(cls, gdf):
        """[summary] returns a list of admin areas in nptg,
            checks those are in the nodes file, if the nodes file has aac not in 

            Args:
                ([gdf])
            Raises:
                NotImplementedError: [description]
                ve: [description]

            Returns:
                [pandas.core.frame.DataFrame]: [localities that are not used in the
                nodes file.]
            """
        # node values

        localities = etl_pipe.naptan_gazette_localities()
        unused = localities[~localities['NptgLocalityCode'].
                            isin(gdf['NptgLocalityCode'])]
        # conversion for geometry.
        unused = unused.rename(columns={
            "Gazette_Longitude": "Longitude",
            "Gazette_Latitude": "Latitude"
        })
        #
        unused = geo_pipe.calculate_naptan_geometry(unused)
        # reporting function
        rep.report_failing_nodes(gdf,
                                 'unused localities near stops',
                                 failed_nodes=failedNodes)
        # m = vis.generate_base_map(unused, 'LocalityName')
        # m
        # TODO find out if any stops are inside the boundaries of the unused areas
        # TODO the geometries are just points for the unused localites
        # TODO find out the closest stops to these points.
        #  localites.
        return unused
Example #3
0
def main(named_area):
    """Downloads the naptan dataset and runs the basic internal
    consistency checks and geospatial checks"""
    # etl pipeline functions.
    etl.naptan_data_source("nptg", "csv")
    etl.naptan_data_source("naptan_nodes", "csv")
    nodes = Path(f"{dl_home}/{timestr}_naptan_nodes.zip")
    nptg = Path(f"{dl_home}/{timestr}_nptg.zip")
    etl.extract_naptan_files(nodes)
    etl.extract_naptan_files(nptg)
    # naptanfilenames = etl.file_verification('ext')

    # dataframe creation
    gdf = etl.read_naptan_file("Stops")
    gdf = etl.deactivated_nodes(gdf)
    # we join the gazette locality code and admin code data onto the nodes data
    # frame, this gives us accurate locality and admin area names.
    locality_codes = etl.naptan_gazette_localities()
    gdf = etl.map_gazette_to_nodes(gdf, locality_codes, "NptgLocalityCode")
    admin_codes = etl.naptan_gazette_admin_area_codes()
    gdf = etl.map_gazette_to_nodes(gdf, admin_codes, "AdminCode")
    # we merge on the stop area data and corresponding codes for stop area
    gdf = etl.merge_stop_areas(gdf)
    gdf = geopipe.calculate_naptan_geometry(gdf)
    # Check that the naptan data structure downloaded is within acceptable
    # tolerances
    NaptanStructureChecks.check_naptan_stop_number_limits(gdf)
    # cli to provide a named administrative area within the naptan dataset.
    naptan_area_level = "AreaName"
    named_area = named_area
    # TODO or locality.
    # TODO make the named area geojson polygon with feature data.
    gdf_sub = etl.create_naptan_subframe(gdf, naptan_area_level, named_area)

    # Data Cleansing functions
    # illegal captials
    IllegalCaptials.check_illegal_caps(gdf_sub, "StopPoint")
    #  illegal characters
    IllegalCharacters.check_illegal_characters(gdf_sub, "StopPoint")
    # check for illegal spaces in required string columns.
    IllegalSpaces.check_illegal_spaces(gdf_sub)
    # The internal data consistency checks
    LocalitiesIDStops.localities_with_identical_stops(gdf_sub)
    NameContainsLocality.stop_name_contains_locality_name(gdf_sub)
    BearingMissing.stop_with_bearing_missing(gdf_sub)
    StopNameHighRisks.stop_names_with_high_risk_words(gdf_sub)
    StopsDifferentNamedAdminArea.stops_in_different_admin_area(gdf_sub)
    # TODO new checks - add to release notes
    CheckDateTime.check_stop_dates_not_after_today(gdf_sub)
    CheckName.check_name_length(gdf_sub)
    MultiRoadName.stop_with_multiple_road_names(gdf_sub, "CommonName")
    AtcocodeCheck.check_atcocode_length(gdf_sub)
    print("All internal consistency checks have been completed.")

    # geospatial data checks
    CoastlineStops.naptan_coastal_nodes(gdf_sub)
    #  checks that should only be performed on locality level, get passed out to
    # this function collection for running through the size of each type.
    etl.locality_level_checks(gdf_sub)
    # area specific checks
    print("All geospatial functions have been completed.")
    # make the map and populate with node cluster.
    generate_base_map(gdf_sub)
    return gdf_sub
    def check_nodes_match_nptg_data(cls, gdf, named_area):
        """[summary] returns a list of admin areas in nptg,
        checks those are in the nodes file, if the nodes file has aac not
        in

        Args:
            gdf ([type]): [the master or named area naptan data file]
            named_area ([type]): [the named area of the naptan subframe]

        Raises:
            NotImplementedError: [description]
            NotImplementedError: [description]
            NotImplementedError: [description]
            NotImplementedError: [description]
            NotImplementedError: [description]
            NotImplementedError: [description]

        Returns:
            [type]: [description]
        """
        #
        check_name = "check_nodes_match_nptg_data"
        # list of all geographic admin areas
        admin_areas = [
            "Aberdeen",
            "Aberdeenshire",
            "Angus",
            "Argyll & Bute",
            "Bath & North East Somerset",
            "Bedford",
            "Blackburn with Darwen",
            "Blackpool",
            "Blaenau Gwent",
            "Bournemouth",
            "Bracknell Forest",
            "Bridgend",
            "Brighton and Hove",
            "Bristol",
            "Buckinghamshire",
            "Caerphilly",
            "Cambridgeshire",
            "Cardiff",
            "Carmarthenshire",
            "Central Bedfordshire",
            "Ceredigion",
            "Cheshire East",
            "Cheshire West & Chester",
            "Clackmannanshire",
            "Conwy",
            "Cornwall",
            "Cumbria",
            "Darlington",
            "Denbighshire",
            "Derby",
            "Derbyshire",
            "Devon",
            "Dorset",
            "Dumfries & Galloway",
            "Dundee",
            "Durham",
            "East Ayrshire",
            "East Dunbartonshire",
            "East Lothian",
            "East Renfrewshire",
            "East Riding of Yorkshire",
            "East Sussex",
            "Edinburgh",
            "Essex",
            "Falkirk",
            "Fife",
            "Flintshire",
            "Glasgow",
            "Gloucestershire",
            "Greater London",
            "Greater Manchester",
            "Gwynedd",
            "Halton",
            "Hampshire",
            "Hartlepool",
            "Herefordshire",
            "Hertfordshire",
            "Highland",
            "Inverclyde",
            "Isle of Anglesey",
            "Isle of Wight",
            "Kent",
            "Kingston upon Hull",
            "Lancashire",
            "Leicester",
            "Leicestershire",
            "Lincolnshire",
            "Luton",
            "Medway",
            "Merseyside",
            "Merthyr Tydfil",
            "Middlesbrough",
            "Midlothian",
            "Milton Keynes",
            "Monmouthshire",
            "Moray",
            "Neath Port Talbot",
            "Newport",
            "Norfolk",
            "North Ayrshire",
            "North East Lincolnshire",
            "North Lanarkshire",
            "North Lincolnshire",
            "North Somerset",
            "North Yorkshire",
            "Northamptonshire",
            "Northumberland",
            "Nottingham",
            "Nottinghamshire",
            "Orkney Islands",
            "Oxfordshire",
            "Pembrokeshire",
            "Perth & Kinross",
            "Peterborough",
            "Plymouth",
            "Poole",
            "Portsmouth",
            "Powys",
            "Reading",
            "Redcar & Cleveland",
            "Renfrewshire",
            "Rhondda Cynon Taff",
            "Rutland",
            "Scottish Borders",
            "Shetland Islands",
            "Shropshire",
            "Slough",
            "Somerset",
            "South Ayrshire",
            "South Gloucestershire",
            "South Lanarkshire",
            "South Yorkshire",
            "Southampton",
            "Southend-on-Sea",
            "Staffordshire",
            "Stirling",
            "Stockton-on-Tees",
            "Stoke-on-Trent",
            "Suffolk",
            "Surrey",
            "Swansea",
            "Swindon",
            "Telford & Wrekin",
            "Thurrock",
            "Torbay",
            "Torfaen",
            "Tyne & Wear",
            "Vale of Glamorgan",
            "Warrington",
            "Warwickshire",
            "West Berkshire",
            "West Dunbartonshire",
            "West Lothian",
            "West Midlands",
            "West Sussex",
            "West Yorkshire",
            "Western Isles",
            "Wiltshire",
            "Windsor & Maidenhead",
            "Wokingham",
            "Worcestershire",
            "Wrexham",
            "York",
        ]

        # TODO get the admin areas from teh nodes file, compare against the list of
        # area names
        # nptg values
        adjanct_locals = etl.load_gazette_adjanct_localities()
        admin_codes = etl.naptan_gazette_admin_area_codes()
        districts = etl.naptan_gazette_districts()
        localities = etl.naptan_gazette_localities()
        locality_alternate = etl.load_gazette_localities_alternative_names()
        locality_hierarch = etl.load_gazette_locality_hierarchy()
        plusbusmap = etl.load_gazette_plusbus_mapping()
        plusbuszone = etl.load_gazette_plusbus_zones()
        regions = etl.naptan_gazette_region()

        # node values
        node_locs = gdf["LocalityName"].unique()
        # get nptg localities,
        nptg_locs = localities["LocalityName"].unique()
        # TODO filter to nptg to nodes, get all the localities in nptg for
        #  this area
        # get the unique area code for this admin area.
        area_admin_code = node_locs["AdminCode"].unique()
        # check the area admin code in the nptg file for the corresponding
        #  localities.
        missing_localities = nptg_locs[~nptg_locs.AdminCode.isin(area_admin_code)]
        # check if locality is
        df3 = gaz_locs[gaz_locs.LocalityName.isin(gdf.LocalityName)]
        # get all the localities
        # TODO list the localities in nptg but not nodes

        # TODO plot sample on map
        # TODO write unused localities in given area to file.
        report_failing_nodes(
            gdf,
            check_name,
        )
        return