コード例 #1
0
def clean_stl_add(df):
    df = df.rename(
        columns={
            "STREETNAME": "address_sn",
            "STREETTYPE": "address_ss",
            "PREDIR": "address_sd",
            "ZIP_CODE": "address_zip"
        })
    df['index'] = np.arange(df.shape[0])
    df = df.to_crs(default_crs)
    df.crs = default_crs
    bounds = df.bounds
    df['address_city'] = 'saint louis'
    df['latitude_min'] = bounds["miny"]
    df['latitude_max'] = bounds["maxy"]
    df['longitude_min'] = bounds["minx"]
    df['longitude_max'] = bounds["maxx"]
    df['direction'] = np.where(
        ((df['FROMLEFT'] < df['TOLEFT']) & (df['FROMRIGHT'] < df['TORIGHT'])),
        "NE",
        np.where(((df['FROMLEFT'] < df['TOLEFT']) &
                  (df['FROMRIGHT'] > df['TORIGHT'])), "NW",
                 np.where(((df['FROMLEFT'] > df['TOLEFT']) &
                           (df['FROMRIGHT'] < df['TORIGHT'])), "SE",
                          np.where(((df['FROMLEFT'] > df['TOLEFT']) &
                                    (df['FROMRIGHT'] > df['TORIGHT'])), "SW",
                                   "SW"))))
    df_r = df[[col for col in df.columns if not bool(re.search("LEFT", col))]]
    df_r['address_n1'] = np.where(df_r['FROMRIGHT'] > df_r['TORIGHT'],
                                  df_r['TORIGHT'], df_r['FROMRIGHT'])
    df_r['address_n2'] = np.where(df_r['TORIGHT'] > df_r['FROMRIGHT'],
                                  df_r['TORIGHT'], df_r['FROMRIGHT'])
    df_l = df[[col for col in df.columns if not bool(re.search("RIGHT", col))]]
    df_l['address_n1'] = np.where(df_l['FROMLEFT'] > df_l['TOLEFT'],
                                  df_l['TOLEFT'], df_l['FROMLEFT'])
    df_l['address_n2'] = np.where(df_l['TOLEFT'] > df_l['FROMLEFT'],
                                  df_l['TOLEFT'], df_l['FROMLEFT'])
    df = pd.concat([df_r, df_l])
    df = df[~((df['address_n1'] <= 0) & (df['address_n1'] <= 0))]
    df = make_panel(df,
                    start_year="address_n1",
                    end_year="address_n2",
                    current_year=df['address_n2'],
                    evens_and_odds=True).rename(columns={'year': 'address_n1'})
    # interpolate lat long

    df = interpolate_polygon(df, "index", "direction")
    df['lat'] = df['lat_interpolated']
    df['long'] = df["long_interpolated"]

    df = add_subset_address_cols(df)
    df = parallelize_dataframe(df=df, func=clean_parse_parallel, n_cores=2)
    return df
コード例 #2
0
def clean_chi_add_parcels(df):
    chicago_rename_dict = {
        'property_address': 'address_fa',
        'property_city': 'address_city',
        'property_zip': 'address_zip',
        'pin': 'parcelID',
        'latitude': 'lat',
        'longitude': 'long'
    }
    df.rename(columns=chicago_rename_dict, inplace=True)
    df = add_subset_address_cols(df)
    df = parallelize_dataframe(df=df, func=clean_parse_parallel, n_cores=4)
    return df
コード例 #3
0
def clean_sac_add(df):
    sac_rename_dict = {
        'APN': 'parcelID',
        "Address_Number": 'address_n1',
        "Street_Name": "address_sn",
        "Street_Suffix": "address_ss",
        "Pre_Directiona;": "address_sd",
        "Postal_City": 'address_city',
        "Zip_Code": "address_zip",
        "Latitude_Y": "lat",
        "Longitude_X": "long",
    }
    df.rename(columns=sac_rename_dict, inplace=True)
    df = add_subset_address_cols(df)
    df = parallelize_dataframe(df=df, func=clean_parse_parallel, n_cores=2)
    return df
コード例 #4
0
def clean_chi_add_points(df):
    chicago_rename_dict = {
        'ADDRNOCOM': 'address_n1',
        'STNAMEPRD': 'address_sd',
        'STNAME': 'address_sn',
        'STNAMEPOT': 'address_ss',
        'PLACENAME': 'address_city',
        'ZIP5': 'address_zip',
        'CMPADDABRV': 'address_fa',
        'PIN': 'parcelID',
        'XPOSITION': 'long',
        'YPOSITION': 'lat'
    }
    df.rename(columns=chicago_rename_dict, inplace=True)
    df = add_subset_address_cols(df)
    df = parallelize_dataframe(df=df, func=clean_parse_parallel, n_cores=2)
    return df
コード例 #5
0
def clean_baton_rouge_add(df):
    baton_rouge_rename_dict = {
        'ADDRNOCOM': 'address_n1',
        'ASTREET PREFIX DIRECTION': 'address_sd',
        'STREET NAME': 'address_sn',
        'STREET SUFFIX TYPE': 'address_ss',
        'CITY': 'address_city',
        'ZIP': 'address_zip',
        'FULL ADDRESS': 'address_fa'
    }
    df.rename(columns=baton_rouge_rename_dict, inplace=True)
    lat_long = df['GEOLOCATION'].str.extract('([0-9\.]+),([0-9\.]+)')
    df['lat'] = lat_long.iloc[:, 0]
    df['long'] = lat_long.iloc[:, 1]
    df = add_subset_address_cols(df)
    df = parallelize_dataframe(df=df, func=clean_parse_parallel, n_cores=4)
    return df
コード例 #6
0
def clean_seattle_add(df):
    seattle_rename_dict = {
        'PIN': 'parcelID',
        'ADDR_NUM': 'address_n1',
        'ADDR_SN': 'address_sn',
        'ADDR_ST': 'address_ss',
        'ADDR_SD': 'address_sd',
        'ZIP5': 'address_zip',
        'CTYNAME': 'address_city',
        'ADDR_FULL': 'address_fa',
        'LON': 'long',
        'LAT': 'lat'
    }
    df.rename(columns=seattle_rename_dict, inplace=True)
    df = add_subset_address_cols(df)
    df = parallelize_dataframe(df=df, func=clean_parse_parallel, n_cores=2)
    return df
コード例 #7
0
def clean_orlando_add(df):
    orlando_rename_dict = {
        'OFFICIAL_P': 'parcelID',
        "COMPLETE_A": 'address_fa',
        "ADDRESS__1": 'address_n1',
        "ADDRESS__2": "address_n2",
        "BASENAME": "address_sn",
        "POST_TYPE": "address_ss",
        "POST_DIREC": "address_sd",
        "MUNICIPAL_": 'address_city',
        "ZIPCODE": "address_zip",
        "LATITUDE": "lat",
        "LONGITUDE": "long",
    }
    df.rename(columns=orlando_rename_dict, inplace=True)
    df = add_subset_address_cols(df)
    df = parallelize_dataframe(df=df, func=clean_parse_parallel, n_cores=2)
    return df
コード例 #8
0
def clean_sf_add(df):
    sf_rename_dict = {
        "Parcel Number": 'parcelID',
        'Unit Number': 'address_u',
        'Address Number': 'address_n1',
        'Street Name': 'address_sn',
        'Street Type': 'address_ss',
        'ZIP Code': 'address_zip',
        'Address': 'address_fa',
        #'PIN': 'parcelID',
        'Longitude': 'long',
        'Latitude': 'lat'
    }
    df.rename(columns=sf_rename_dict, inplace=True)
    df['address_city'] = "San Francisco"
    df = add_subset_address_cols(df)
    df = parallelize_dataframe(df=df, func=clean_parse_parallel, n_cores=2)
    return df
コード例 #9
0
def clean_sd_add(df):
    sd_rename_dict = {
        'addrunit': 'address_u',
        'addrnmbr': 'address_n1',
        'addrpdir': 'address_sd',
        'addrname': 'address_sn',
        'addrsfx': 'address_ss',
        'addrzip': 'address_zip',
        'community': 'address_city',
        'PIN': 'parcelID',
    }
    df.rename(columns=sd_rename_dict, inplace=True)
    df = df.to_crs(default_crs)
    df.crs = default_crs
    df['long'] = df.geometry.centroid.x
    df['lat'] = df.geometry.centroid.y
    df = add_subset_address_cols(df)
    df = parallelize_dataframe(df=df, func=clean_parse_parallel, n_cores=2)
    return df
コード例 #10
0
def clean_la_add(df):
    la_rename_dict = {
        'AIN': 'parcelID',
        'UnitName': 'address_u',
        'Number': 'address_n1',
        'PostType': 'address_ss',
        'PreDirAbbr': 'address_sd',
        'ZipCode': 'address_zip',
        'LegalComm': 'address_city',
    }
    df.rename(columns=la_rename_dict, inplace=True)
    combine_names(df,
                  name_cols=['PreType', 'StArticle', 'StreetName'],
                  newCol="address_sn")
    df = df.to_crs(default_crs)
    df.crs = default_crs
    df['long'] = df.geometry.centroid.x
    df['lat'] = df.geometry.centroid.y
    df = add_subset_address_cols(df)
    df = parallelize_dataframe(df=df, func=clean_parse_parallel, n_cores=2)
    return df
コード例 #11
0
def clean_int_addresses(df):
    df = add_subset_address_cols(df)
    df = parallelize_dataframe(df=df, func=clean_parse_parallel, n_cores=2)
    return df