예제 #1
0
def remove_illegal_caps(gdf, column_name):
    """[summary] Descriptions:CommonNames should not contain acronyms as single capitals
        separated by spaces or full stops – with the exception of „R C‟,
        „P.H.‟, and „P.O.‟. CommonNames should not contain a sequence
        of lowercase letter followed by uppercase letter – with the
        exceptions of 'McX' and 'MacX'
    Args:
        gdf ([pandas dataframe]): [the master naptan nodes file.]
        columnName ([type]): [description]

    Returns:
        IIC [type]: [description]
    """

    check_name = remove_illegal_caps.__name__
    gdf1 = gdf
    except_caps = [
        'AFC', 'ASDA', 'BBC', 'BP', 'CE', 'DHSS', 'DLR', 'FC', 'GMEX', 'HMP',
        'HQ', 'HSBC', 'II', 'III', 'IKEA', 'IV', 'IX', 'MFI', 'MOD', 'NCP',
        'NE', 'NR', 'NW', 'PH', 'PO', 'RAF', 'RC', 'RSPCA', 'SE', 'SPT', 'SW',
        'VI', 'VII', 'VIII', 'WMC', 'XI', 'XII', 'YMCA', 'YWCA'
    ]
    gdf1['capitals'] = gdf1[column_name].str.count('[A-Z]{3,}')
    gdf1 = gdf1[gdf1['capitals'] != 0]
    # the below, compares a list against named column
    mask = ~gdf1[column_name].apply(
        lambda x: np.intersect1d(x, except_caps).size > 0)
    iic = gdf1[mask]
    report.nodes_error_reporting(gdf, check_name, iic)
    return iic
예제 #2
0
def localities_with_identical_stops(gdf_locality):
    """[summary]StopArea containing StopPoints that do not have identical
    CommonNames.

    The CommonName of stops within a single stoparea should be the same
    as each other (and the same as the name of the stoparea) wherever
    possible. This test identifies examples where the stopnames are not
    identical. At present this test does not identify cases where the stoparea
    name is different from any one or more of the individual stop‟s
    CommonName – but this may be added.

    Given a stop point within a locality, check if the stoppoint is duplicated
    at any point.

    Arguments:
        gdf {[geopandas dataframe]} -- [The Master naptan node frame.]

    Returns:
        df_warnings[type] -- [description]
    """
    check_name = localities_with_identical_stops.__name__
    gdf1 = gdf_locality
    try:
        if len(gdf1['NptgLocalityCode'].unique()) == 1:
            mask = gdf1['StopPoint'].duplicated()
            failed_nodes = gdf1[mask]
            report.nodes_error_reporting(gdf_locality, check_name,
                                         failed_nodes)
            return failed_nodes

    except Exception as e:
        print(f'Not a locality, test can not be performed. {e}')
        pass
예제 #3
0
def stops_in_different_admin_authority_geo_position(gdf, stops, authorities):
    """[summary] The AtcoCode prefix for the StopPoint represents an
    AdminArea other than the one associated with the stop‟s Locality.
    This test highlights those stops which are associated with a locality that
    is itself not in the same administrative area. This is often not wrong – 
    but in some cases it indicates a stop that is incorrectly located, or 
    associated with the wrong locality.

    Check each example and confirm that each represents a stop close to the
    boundary of your authority‟s area – and consider whether the locality
    association with each stop is reasonable, even if it is with a locality
    that is in the adjacent admin area. Check that the coordinates of the stop
    are right, and correct them if not. 
    Args:
        gdf ([gdf]): [the naptan total dataframe]
        stops ([node_type_stops]): [description]
        authorities ([gdf]): [description]

    Raises:
        NotImplementedError: [description]

    Returns:
        [type]: [description]
    """
    check_name = stops_in_different_admin_authority_geo_position.__name__
    # list of stops not in correct admin areas by geo position.
    failed_nodes = ''
    report.nodes_error_reporting(gdf, check_name, failed_nodes)
    return failed_nodes
    raise NotImplementedError
예제 #4
0
def stops_in_different_admin_area(gdf):
    """[summary] Checks if a stop is in a different administrative area, based
    on the AtcoAreaCode Column. We take the first 3 characters prefix of the 
    atcocode and check them against the atcoareacode for the admin area.
    They should match.
    Args:
        gdf ([pandas dataframe]): [The Master naptan node frame.]
    Returns:
        [panda dataframe] -- [description]
    Raises:
        NotImplementedError: [geo spatial cross checking, not implemented yet.]
    """
    check_name = stops_in_different_admin_area.__name__
    gdf1 = gdf
    #  get prefix from atcocode column
    gdf1['atcocodeprefix'] = gdf1['ATCOCode'].str[:3]
    #  get the AtcoAreaCode column value, making sure that we account for
    # 2-digit atcocode prefixes and int types, using to_numeric
    gdf1['AtcoAreaCode'] = gdf1['AtcoAreaCode'].astype(str)
    gdf1['atcocodeprefix'] = pd.to_numeric(gdf1['atcocodeprefix'])
    gdf1['AtcoAreaCode'] = pd.to_numeric(gdf1['AtcoAreaCode'])
    #  compare the two together, they should match
    gdf1['not matching'] = gdf1['atcocodeprefix'].eq(
        pd.to_numeric(gdf1['AtcoAreaCode'], errors='coerce'))
    failed_nodes = gdf1[~gdf1['not matching']]
    report.nodes_error_reporting(gdf, check_name, failed_nodes)
    return failed_nodes
    raise NotImplementedError
예제 #5
0
def stop_names_with_high_risk_words(gdf):
    """[summary] Descriptions: StopPoint has a CommonName that contains one of
     the following high risk words: DELETE, DELETED, N/A, N/K, OBSOLETE,
        UNUSED (case-insensitive).
    Args:
        gdf ([geopandas ]): [a pandas dataframe of the current naptan file.]

    Returns:
        df_risks [type]: [csv file containing risk updates.]
    """

    check_name = stop_names_with_high_risk_words.__name__
    gdf1 = gdf
    riskwords = [
        'DELETE', 'DELETED', 'N/A', 'NOT IN USE'
        'N/K', 'OBSOLETE', 'UNUSED'
    ]
    gdf1['CommonName'] = gdf1['CommonName'].str.upper()
    gdf1['RiskWords'] = gdf1['CommonName'].apply(
        lambda x: 1 if any(i in x for i in riskwords) else 0)
    df_risks = gdf1.loc[gdf1['RiskWords'] != 0]
    endcol = len(df_risks.columns)
    df_risks.insert(endcol, 'Warning Flag', check_name)
    report.nodes_error_reporting(gdf, check_name, df_risks)
    return df_risks
예제 #6
0
def stop_with_multiple_road_names(gdf):
    """[summary]CommonNames in NaPTAN should be simple and not composite. Most
        examples of commonnames which include two of the designated words are
        ones where two road names are used in a composite name, contrary to 
        NaPTAN guidance.
        This uses regex, but they could be some other way of doing this...
    Arguments:
        df {[type]} -- [description]
    """
    check_names = stop_with_multiple_road_names.__name__
    swmrn_gdf = gdf
    swmrn_gdf['CommonName'] = swmrn_gdf['CommonName'].str.lower()

    # leave this here, no it's not being used, just leave it anyway.
    targets = [
        'road', 'roads', 'street', 'streets', 'avenue', 'avenues', 'garden',
        'gardens', 'lane', 'lanes', 'drive', 'drives', 'way', 'ways'
    ]

    pattern = (r"\b(road|roads|\
                    street|streets|\
                    avenue|\avenues|\
                    garden|gardens|\
                    lane|lanes\
                    drive|drives\
                    way|ways)\b")

    fail_rds_re = (r"\b('street|streets|avenue|avenues|garden|"
                   r"gardens|lane|lanes|drive|drives|way|ways')\b")
    fail_aves_re = (r"\b('road|roads|street|streets|garden|gardens|"
                    r"lane|lanes|drive|drives|way|ways')\b")
    fail_gdns_re = (r"\b('road|roads|street|streets|avenue|avenues|"
                    r"lane|lanes|drive|drives|way|ways')\b")
    fail_lanes_re = (r"\b('road|roads|street|streets|avenue|avenues|"
                     r"garden|gardens|drive|drives|way|ways')\b")
    fail_drives_re = (r"\b('road|roads|street|streets|avenue|avenues|"
                      r"garden|gardens|lane|lanes|way|ways')\b")
    fail_ways_re = (r"\b('road|roads|street|streets|avenue|avenues|"
                    r"garden|gardens|lane|lanes|drive|drives')\b")

    tn = swmrn_gdf[swmrn_gdf['CommonName'].str.contains(pattern, regex=True)]
    roads = tn[tn['CommonName'].str.contains(r"\b(road|roads)\b")]
    fail_rds = roads[roads['CommonName'].str.contains(fail_rds_re, regex=True)]
    aves = tn[tn['CommonName'].str.contains(r"\b(avenue|avenues)\b")]
    fail_aves = aves[aves['CommonName'].str.contains(fail_aves_re, regex=True)]
    gdns = tn[tn['CommonName'].str.contains(r"\b(garden|gardens)\b")]
    failgdns = gdns[gdns['CommonName'].str.contains(fail_gdns_re, regex=True)]
    lanes = tn[tn['CommonName'].str.contains(r"\b(lane|lanes)\b")]
    faillanes = lanes[lanes['CommonName'].str.contains(fail_lanes_re,
                                                       regex=True)]
    drives = tn[tn['CommonName'].str.contains(r"\b(drive|drives)\b")]
    faildrives = drives[drives['CommonName'].str.contains(fail_drives_re,
                                                          regex=True)]
    ways = tn[tn['CommonName'].str.contains(r"\b(way|ways)\b")]
    failways = ways[ways['CommonName'].str.contains(fail_ways_re, regex=True)]
    all_dfs = [fail_rds, fail_aves, failgdns, faillanes, faildrives, failways]
    failed_nodes = pd.concat(all_dfs)
    failed_nodes['CommonName'] = failed_nodes['CommonName'].str.title()
    report.nodes_error_reporting(gdf, check_names, failed_nodes)
    return failed_nodes
예제 #7
0
    def nodes_error_reporting_tests(self):
        """[summary]
        """
        report.nodes_error_reporting()

        def error_folder_created(self):
            """[summary]
            """
            pass

        def error_report_created(self):
            """[summary]
            """
            pass
예제 #8
0
def stop_area_members_with_different_localities(gdf):
    """[summary]

    Raises:
        NotImplementedError: [description]

    Returns:
        [type]: [description]
    """
    check_name = stop_area_members_with_different_localities.__name__
    # list of stops not in correct admin areas by geo position.
    failed_nodes = ''
    report.nodes_error_reporting(gdf, check_name, failed_nodes)
    return failed_nodes
    raise NotImplementedError
예제 #9
0
def unused_locality_near_stops(gdf):
    """[summary]

    Raises:
        NotImplementedError: [description]

    Returns:
        [type]: [description]
    """
    check_name = unused_locality_near_stops.__name__
    # list of stops not in correct admin areas by geo position.
    failed_nodes = ''
    report.nodes_error_reporting(gdf, check_name, failed_nodes)
    return failed_nodes
    raise NotImplementedError
예제 #10
0
def check_name_too_long(gdf):
    """[summary]:- A stop point fails if StopPoint has a full name [Locality,
     CommonName (Indicator)] that is more than 80 characters in length.

    Arguments:
        gdf {[geopandas dataframe]} -- [The naptan master dataframe.]
    Returns:
        df {[dataframe of failed nodes]} -- Nodes that failed the check.
    """
    check_name = check_name_too_long.__name__
    gdf1 = gdf
    gdf1['newName'] = gdf1['CommonName'].astype(
        str) + ', ' + gdf1['LocalityName'].astype(str)
    mask = (gdf1['newName'].str.len() > 80)
    df_str = gdf1.loc[mask]
    report.nodes_error_reporting(gdf, check_name, df_str)
    return df_str.ATCOCode
예제 #11
0
def stop_with_bearing_missing(gdf):
    """[summary] The data does not include a value for “bearing” for all BCT
     stops except those in the FLX (flexible zone) sub-type.

    Args:
        gdf {[geopandas dataframe]} -- [The naptan master dataframe.]

    Returns:
        [type]: [description]
    """
    check_name = stop_with_bearing_missing.__name__
    valid_bearing = ['SW', 'NE', 'SE', 'S', 'N', 'NW', 'E', 'W']
    failed_nodes = gdf[(gdf['StopType'] == 'BCT')
                       & (gdf['BusStopType'] != 'FLX') &
                       (~gdf['Bearing'].isin(valid_bearing))]
    report.nodes_error_reporting(gdf, check_name, failed_nodes)
    return failed_nodes
예제 #12
0
def locality_not_unique(gdf):
    """[summary]

    Args:
        gdf ([type]): [description]

    Raises:
        NotImplementedError: [description]

    Returns:
        [type]: [description]
    """
    check_name = locality_not_unique.__name__
    gdf1 = gdf
    failed_nodes = ''
    report.nodes_error_reporting(gdf, check_name, failed_nodes)
    return failed_nodes
    raise NotImplementedError
예제 #13
0
def stops_in_alternate_localities(gdf):
    """[summary]

    Args:
        gdf ([type]): [description]

    Raises:
        NotImplementedError: [description]

    Returns:
        [type]: [description]
    """
    check_name = stops_in_alternate_localities.__name__
    gdf1 = gdf
    failed_nodes = ''
    report.nodes_error_reporting(gdf, check_name, failed_nodes)
    return failed_nodes
    raise NotImplementedError
예제 #14
0
def stops_area_members_without_identical_names(gdf):
    """[summary]

    Args:
        gdf ([type]): [description]

    Raises:
        NotImplementedError: [description]

    Returns:
        [type]: [description]
    """
    check_name = stops_area_members_without_identical_names.__name__
    gdf1 = gdf
    failed_nodes = ''
    report.nodes_error_reporting(gdf, check_name, failed_nodes)
    return failed_nodes
    raise NotImplementedError
예제 #15
0
def stops_in_parent_locality(gdf):
    """[summary]

    Args:
        gdf ([type]): [description]

    Raises:
        NotImplementedError: [description]

    Returns:
        [type]: [description]
    """
    check_name = stops_in_parent_locality.__name__
    # list of stops not in correct admin areas by geo position.
    failed_nodes = ''
    report.nodes_error_reporting(gdf, check_name, failed_nodes)
    return failed_nodes
    raise NotImplementedError
예제 #16
0
def locality_with_unusually_elongated_shape(gdf):
    """[summary] 

    Args:
        gdf ([type]): [description]

    Raises:
        NotImplementedError: [description]

    Returns:
        [type]: [description]
    """
    check_name = hail_ride_section_length.__name__
    # list of stops not in correct admin areas by geo position.
    failed_nodes = ''
    report.nodes_error_reporting(gdf, check_name, failed_nodes)
    return failed_nodes
    raise NotImplementedError
예제 #17
0
def localities_contained_by_non_parent(gdf):
    """[summary] 

    Args:
        gdf ([type]): [description]

    Raises:
        NotImplementedError: [description]

    Returns:
        [type]: [description]
    """
    #TODO use for both 90% overlap rule and 40-89% rule.
    check_name = localities_contained_by_non_parent.__name__
    # list of stops not in correct admin areas by geo position.
    failed_nodes = ''
    report.nodes_error_reporting(gdf, check_name, failed_nodes)
    return failed_nodes
    raise NotImplementedError
예제 #18
0
def naptan_coastal_nodes(gdf):
    # TODO - add a column to the master naptan dataframe, and then count up
    #  false values, to get the percent of stops that fail, and then compare
    #  those stops, to find out which ones are near the coast and how near
    #  the coast they are.
    """[summary] provided a dataframe, returns a list of nodes that are near the
    coast line, this uses global land mask library, a numpy & pandas extension, 
    for mapping the boundaries of the coastline.

    Arguments:
        df {[geospatial dataframe]} -- [the naptan master dataframe.]

    Raises:
        ve: [Raises description]
        e:  []
    Returns:
        [type] -- [description]
    """

    check_name = naptan_coastal_nodes.__name__
    try:
        gdf['Land_State'] = globe.is_land(gdf['Latitude'], gdf['Longitude'])
        coastal_nodes = gdf.loc[~gdf.Land_State]
        high_node_areas = coastal_nodes['LocalityName'].value_counts()
        percentage = ((len(coastal_nodes) / len(gdf)) * 100.0)
        if percentage >= 1.1:
            print(f"The area has a total of {coastal_nodes}, nodes which are \
                    at sea error ratio is {percentage:0.2f}% too high.")
        elif percentage <= 0:
            print('No Nodes were found along the coastline')
            pass
        else:
            print(f"The area has a total of {coastal_nodes} in the area.\
                  {percentage:0.2f}")
        report.nodes_error_reporting(gdf, check_name, coastal_nodes)
        return high_node_areas

    except ValueError as ve:
        raise (ve)

    except Exception as e:
        print(e)
예제 #19
0
def road_name_matches_coordinates(gdf, ATCOCode):
    """[summary] Checks that the road name in the record, matches if the 
       The “street” shown in the data does not correspond with the name attached
       to the road segment to which the stop is snapped in the Navteq mapping
       data used by Ito.
    Arguments:
        gdf {[geopandas dataframe]} -- [pass in the chosen dataframe]
        ATCOCode {[str]} -- [Pass in the given naptan unique stop id.]

    Returns:
        [type] -- [description]
    """
    check_name = road_name_matches_coordinates.__name__
    gdf1 = gdf
    node = gdf1.loc[gdf1['ATCOCode'] == ATCOCode]
    found_name = get_nearest_road_name(gdf1, ATCOCode)
    if found_name[1] == node['Street'][0]:
        print('Road Name Matches')
        pass
    else:
        res = node["ATCOCode"]
        report.nodes_error_reporting(gdf, check_name, res)
    return res
예제 #20
0
    # for reporting
    check_name = remove_illegal_chars.__name__
    gdf1 = gdf
    # our regex pattern of allowed special characters.
    pattern = r"\bO/S|NO\.|P\.H\.|P\.O\.|ST\.|'s\b"
    excluded_nodes = gdf1[gdf1[col_name].str.contains(pattern,
                                                      case=False,
                                                      regex=True)]
    mask = gdf1[col_name].isin(excluded_nodes[col_name])
    df_filter = gdf1[~mask]  # removing excluded nodes from stops frame.
    regex = re.compile(r"\[^a-zA-Z !@#$%&\*_\+=\|:;<>,./[\]\{\}\']",
                       flags=re.IGNORECASE)
    df_filter[col_name] = df_filter[col_name].str.replace(regex,
                                                          '',
                                                          regex=True)
    report.nodes_error_reporting(gdf, check_name, df_filter)
    result = df_filter.append(excluded_nodes)
    return result


# %%
def stop_with_bearing_missing(gdf):
    """[summary] The data does not include a value for “bearing” for all BCT
     stops except those in the FLX (flexible zone) sub-type.

    Args:
        gdf {[geopandas dataframe]} -- [The naptan master dataframe.]

    Returns:
        [type]: [description]
    """