Example #1
0
def test_random_state():

    # no seeds
    reno = Community.from_census(msa_fips="39900", datastore=DataStore())
    r1 = reno.cluster(columns=columns, method="kmeans", n_clusters=5)
    r2 = reno.cluster(columns=columns, method="kmeans", n_clusters=5)
    card1 = r1.gdf.groupby("kmeans").count()["geoid"].values
    card1.sort()
    card2 = r2.gdf.groupby("kmeans").count()["geoid"].values
    card2.sort()
    # test that the cardinalities are different
    np.testing.assert_raises(
        AssertionError, np.testing.assert_array_equal, card1, card2
    )

    # seeds
    reno = Community.from_census(msa_fips="39900", datastore=DataStore())
    seed = 10
    r1 = reno.cluster(columns=columns, method="kmeans", n_clusters=5, random_state=seed)
    r2 = reno.cluster(columns=columns, method="kmeans", n_clusters=5, random_state=seed)
    card1 = r1.gdf.groupby("kmeans").count()["geoid"].values
    card1.sort()
    card2 = r2.gdf.groupby("kmeans").count()["geoid"].values
    card2.sort()
    # test that the cardinalities are identical
    np.testing.assert_array_equal(card1, card2)
Example #2
0
def test_Community_from_gdfs_crs():

    t90 = datasets.tracts_1990()
    t00 = datasets.tracts_2000()
    t90 = t90.to_crs(4236)
    t00 = t00.to_crs(3857)
    try:
        Community.from_geodataframes([t90, t00])
    except AssertionError:
        print("From_gdfs constructor successfully detects inconsistent crs.")
        pass
def Adaptive_Choropleth_Mapper_viz(param):
    write_LOG(param)

    # convert year, variable to years, variables in the param
    if ('years' not in param and 'year' in param):
        param['years'] = [param['year']]
    if ('variables' not in param and 'variable' in param):
        param['variables'] = [param['variable']]
    #print(param)

    # select community by state_fips, msa_fips, county_fips
    if ('msa_fips' in param and param['msa_fips']):
        community = Community.from_ltdb(years=param['years'],
                                        msa_fips=param['msa_fips'])
        #community = Community.from_ltdb(msa_fips=param['msa_fips'])
    elif ('county_fips' in param and param['county_fips']):
        community = Community.from_ltdb(years=param['years'],
                                        county_fips=param['county_fips'])
    elif ('state_fips' in param and param['state_fips']):
        community = Community.from_ltdb(years=param['years'],
                                        state_fips=param['state_fips'])
    #print(community.gdf)

    codebook = pd.read_csv('template/conversion_table_codebook.csv')
    codebook.set_index(keys='variable', inplace=True)
    labels = copy.deepcopy(param['variables'])
    label = 'short_name'  # default
    if (param['label'] == 'variable'): label = 'variable'
    if (param['label'] == 'full_name'): label = 'full_name'
    if (param['label'] == 'short_name'): label = 'short_name'
    if (label != 'variable'):
        for idx, variable in enumerate(param['variables']):
            try:
                codeRec = codebook.loc[variable]
                labels[idx] = codeRec[label]
            except:
                print("variable not found in codebook.  variable:", variable)
    param['labels'] = labels

    write_INDEX_html(param)
    write_GEO_CONFIG_js(param)
    write_GEO_VARIABLES_js(community, param)
    write_GEO_JSON_js(community, param)

    local_dir = os.path.dirname(os.path.realpath(__file__))
    fname = urllib.parse.quote('index.html')
    template_dir = os.path.join(local_dir, 'ACM_' + param['filename_suffix'])
    url = 'file:' + os.path.join(template_dir, fname)
    webbrowser.open(url)

    print('Please run ' + '"ACM_' + param['filename_suffix'] + '/index.html"' +
          ' to your web browser.')
    print('Advanced options are available in ' + '"ACM_' +
          param['filename_suffix'] + '/data/GEO_CONFIG.js"')
Example #4
0
def test_harmonize_area():
    la = Community.from_census(county_fips="06037")

    harmonized = la.harmonize(2000,
                              extensive_variables=["n_total_housing_units"],
                              intensive_variables=["p_vacant_housing_units"],
                              raster=local_raster)

    assert_allclose(
        harmonized.gdf[harmonized.gdf.year ==
                       2000].n_total_housing_units.sum(),
        3271578.974605,
        atol=600,
    )
    assert_allclose(
        harmonized.gdf[harmonized.gdf.year ==
                       1990].n_total_housing_units.sum(),
        3163560.996240,
    )
    assert_allclose(
        harmonized.gdf[harmonized.gdf.year ==
                       2010].n_total_housing_units.sum(),
        3441415.997327,
    )
    assert_allclose(harmonized.gdf.p_vacant_housing_units.sum(),
                    33011.58879,
                    rtol=1e-03)
Example #5
0
def test_Community_from_gdfs():

    t90 = datasets.tracts_1990()
    t90 = t90[t90.geoid.str.startswith("11")]
    t00 = datasets.tracts_2000()
    t00 = t00[t00.geoid.str.startswith("11")]

    assert Community.from_geodataframes([t90, t00]).gdf.shape == (380, 192)
Example #6
0
def test_harmonize_area_weighted():

    balt = Community.from_census(county_fips="24510")
    harmonized_nlcd_weighted = balt.harmonize(
        2000,
        extensive_variables=["n_total_housing_units"],
        intensive_variables=["p_vacant_housing_units"],
        weights_method="dasymetric",
        raster=local_raster)
    assert harmonized_nlcd_weighted.gdf.n_total_housing_units.sum() == 900620.0
    assert_allclose(harmonized_nlcd_weighted.gdf.p_vacant_housing_units.sum(),
                    8832.8796,
                    rtol=1e-03)
Example #7
0
def test_transition():
    """
    Testing transition modeling.
    """
    columbus = Community.from_ltdb(msa_fips="18140")
    columns = [
        "median_household_income",
        "p_poverty_rate",
        "p_edu_college_greater",
        "p_unemployment_rate",
    ]
    columbus1 = columbus.cluster(
        columns=[
            "median_household_income",
            "p_poverty_rate",
            "p_edu_college_greater",
            "p_unemployment_rate",
        ],
        method="ward",
    )

    # 1. Markov modeling
    m = transition(columbus1.gdf, cluster_col="ward")
    mp = np.array([
        [
            0.79189189, 0.00540541, 0.0027027, 0.13243243, 0.06216216,
            0.00540541
        ],
        [0.0203252, 0.75609756, 0.10569106, 0.11382114, 0.0, 0.00406504],
        [0.00917431, 0.20183486, 0.75229358, 0.01834862, 0.0, 0.01834862],
        [0.1959799, 0.18341709, 0.00251256, 0.61809045, 0.0, 0.0],
        [0.32307692, 0.0, 0.0, 0.0, 0.66153846, 0.01538462],
        [0.09375, 0.0625, 0.0, 0.0, 0.0, 0.84375],
    ])
    np.testing.assert_allclose(m.p, mp, RTOL)

    # 2. Spatial Markov modeling
    np.random.seed(5)
    sm = transition(columbus1.gdf, cluster_col="ward", w_type="queen")
    smp = np.array([
        [0.82413793, 0.0, 0.0, 0.10689655, 0.06896552, 0.0],
        [0.25, 0.5, 0.125, 0.125, 0.0, 0.0],
        [0.5, 0.0, 0.5, 0.0, 0.0, 0.0],
        [0.23809524, 0.0952381, 0.0, 0.66666667, 0.0, 0.0],
        [0.21621622, 0.0, 0.0, 0.0, 0.75675676, 0.02702703],
        [0.16666667, 0.0, 0.0, 0.0, 0.0, 0.83333333],
    ])
    np.testing.assert_allclose(sm.P[0], smp, RTOL)
Example #8
0
def test_sequence():
    """
    Testing sequence modeling.
    """

    columbus = Community.from_ltdb(msa_fips="18140")
    columns = [
        "median_household_income",
        "p_poverty_rate",
        "p_edu_college_greater",
        "p_unemployment_rate",
    ]
    columbus1 = columbus.cluster(
        columns=[
            "median_household_income",
            "p_poverty_rate",
            "p_edu_college_greater",
            "p_unemployment_rate",
        ],
        method="ward",
    )

    # 1. Transition-orientied optimal matching
    output = sequence(columbus1.gdf,
                      seq_clusters=5,
                      dist_type="tran",
                      cluster_col="ward")

    values = np.array([3, 3, 0, 2, 3, 1])
    np.testing.assert_allclose(output[1].values[0], values, RTOL)

    # 2. Hamming distance

    output = sequence(columbus1.gdf,
                      seq_clusters=5,
                      dist_type="hamming",
                      cluster_col="ward")
    values = np.array([3, 3, 0, 2, 3, 2])
    np.testing.assert_allclose(output[1].values[0], values, RTOL)
Example #9
0
def test_Community_from_census():
    assert Community.from_census(state_fips="24").gdf.shape == (3759, 195)
Example #10
0
def test_Community_from_boundary():
    msas = datasets.msas()

    reno = msas[msas["geoid"] == "39900"]
    rn = Community.from_census(boundary=reno)
    assert rn.gdf.shape == (234, 195)
Example #11
0
def test_Community_from_indices():

    chi = Community.from_ncdb(fips=["17031", "17019"])
    assert chi.gdf.shape == (6797, 78)
Example #12
0
def test_Community_from_stcofips():

    mn = Community.from_census(state_fips="27", county_fips=["26001", "26002"])
    assert mn.gdf.shape == (3881, 195)
Example #13
0
def test_Community_from_cbsa():

    la = Community.from_census(msa_fips="31080")
    assert la.gdf.shape == (7683, 195)
Example #14
0
from geosnap import Community
import numpy as np
from geosnap import DataStore
from numpy.testing import assert_array_equal, assert_array_almost_equal

reno = Community.from_census(msa_fips="39900", datastore=DataStore())
columns = [
    "median_household_income",
    "p_poverty_rate",
    "p_unemployment_rate",
]

# Aspatial Clusters


def test_gm():

    r = reno.cluster(columns=columns, method="gaussian_mixture", best_model=True)
    assert len(r.gdf.gaussian_mixture.unique()) >= 5


def test_ward():
    r = reno.cluster(columns=columns, method="ward")
    assert len(r.gdf.ward.unique()) == 7


def test_spectral():

    r = reno.cluster(columns=columns, method="spectral")
    assert len(r.gdf.spectral.unique()) == 7
Example #15
0
import numpy as np

from geosnap import Community
from geosnap.analyze import sequence, transition

RTOL = 0.00001

columbus = Community.from_ltdb(msa_fips="18140")
columns = [
    "median_household_income",
    "p_poverty_rate",
    "p_edu_college_greater",
    "p_unemployment_rate",
]
columbus1 = columbus.cluster(
    columns=[
        "median_household_income",
        "p_poverty_rate",
        "p_edu_college_greater",
        "p_unemployment_rate",
    ],
    method="ward",
)


def test_transition():
    """
    Testing transition modeling.
    """

    # 1. Markov modeling
Example #16
0
def Adaptive_Choropleth_Mapper_viz(param):

    # convert year, variable to years, variables in the param
    if ('years' not in param and 'year' in param):
        param['years'] = [param['year']]
    if ('variables' not in param and 'variable' in param):
        param['variables'] = [param['variable']]
    #print(param)

    # select community by state_fips, msa_fips, county_fips
    community = None
    if ('msa_fips' in param and param['msa_fips']):
        community = Community.from_ltdb(years=param['years'],
                                        msa_fips=param['msa_fips'])
        #community = Community.from_ltdb(msa_fips=param['msa_fips'])
    elif ('county_fips' in param and param['county_fips']):
        community = Community.from_ltdb(years=param['years'],
                                        county_fips=param['county_fips'])
    elif ('state_fips' in param and param['state_fips']):
        community = Community.from_ltdb(years=param['years'],
                                        state_fips=param['state_fips'])
    #print(community.gdf)

# if the user enters CSV and shapefile, use the files from the user

#### This is executed when the user enter attributes in csv file and geometroy in shapefile ######################
    if (community is None and 'inputCSV' in param):
        community = Community()
        #community.gdf = pd.read_csv(param['inputCSV'], dtype={'geoid':str})
        community.gdf = param["inputCSV"]
        #print(community.gdf)
        geoid = community.gdf.columns[0]
        #community.gdf = community.gdf.astype(str)
        #print("inputCSV:  " + community.gdf.geoid)
        community.gdf[community.gdf.columns[0]] = community.gdf[geoid].astype(
            str)
        #print("community.gdf.columns[0]:", community.gdf.columns[0])

        # read shape file to df_shape
        #df_shape = gpd.read_file(param['shapefile'])
        df_shape = param['shapefile']
        df_shape = df_shape.astype(str)
        #print("shapefile:  " + df_shape.GEOID10)
        geokey = df_shape.columns[0]
        #print(geokey)
        df_shape = df_shape.set_index(geokey)

        # insert geometry to community.gdf
        geometry = []
        for index, row in community.gdf.iterrows():
            tractid = row[geoid]
            try:
                tract = df_shape.loc[tractid]
                geometry.append(shapely.wkt.loads(tract.geometry))
            except KeyError:
                #print("Tract ID [{}] is not found in the shape file {}".format(tractid, param['shapefile']))
                geometry.append(None)
    # print( "geometry" in community.gdf )
    #f hasattr(community.gdf, "geoemtry"):
    #if (community.gdf["geoemtry"] is None):
    #   pass
    #else:
        if (("geometry" in community.gdf) == False):
            community.gdf.insert(len(community.gdf.columns), "geometry",
                                 geometry)


################################################################################################################

    community.gdf = community.gdf.replace([np.inf, -np.inf], np.nan)
    # check if geometry is not null for Spatial Clustering
    community.gdf = community.gdf[pd.notnull(community.gdf['geometry'])]
    #print(community.gdf)

    codebook = pd.read_csv('template/conversion_table_codebook.csv')
    codebook.set_index(keys='variable', inplace=True)
    labels = copy.deepcopy(param['variables'])
    label = 'short_name'  # default
    if (param['label'] == 'variable'): label = 'variable'
    if (param['label'] == 'full_name'): label = 'full_name'
    if (param['label'] == 'short_name'): label = 'short_name'
    if (label != 'variable'):
        for idx, variable in enumerate(param['variables']):
            try:
                codeRec = codebook.loc[variable]
                labels[idx] = codeRec[label]
            except:
                print("variable not found in codebook.  variable:", variable)
    param['labels'] = labels

    write_INDEX_html(param)
    write_CONFIG_js(param)
    write_VARIABLES_js(community, param)
    write_GEO_JSON_js(community, param)
    '''
    #Create directory for local machine
    local_dir = os.path.dirname(os.path.realpath(__file__))
    fname =urllib.parse.quote('index.html')
    template_dir = os.path.join(local_dir, 'ACM_' + param['filename_suffix'])
    url = 'file:' + os.path.join(template_dir, fname)
    webbrowser.open(url)
    
    print('Please run ' + '"ACM_' + param['filename_suffix']+'/index.html"'+' to your web browser.')
    print('Advanced options are available in ' + '"ACM_' + param['filename_suffix']+'/data/CONFIG.js"')
    '''

    #Create directory for Visualization
    servers = list(notebookapp.list_running_servers())
    servers1 = 'https://cybergisx.cigi.illinois.edu' + servers[0][
        "base_url"] + 'view'
    servers2 = 'https://cybergisx.cigi.illinois.edu' + servers[0][
        "base_url"] + 'edit'
    cwd = os.getcwd()
    prefix_cwd = "/home/jovyan/work"
    cwd = cwd.replace(prefix_cwd, "")

    # This is for Jupyter notebbok installed in your PC
    local_dir1 = cwd
    local_dir2 = cwd

    #This is for CyberGISX. Uncomment two command lines below when you run in CyberGIX Environment
    #local_dir1 = servers1 + cwd
    #local_dir2 = servers2 + cwd

    #print(local_dir)
    fname = urllib.parse.quote('index.html')
    template_dir = os.path.join(local_dir1, 'ACM_' + param['filename_suffix'])
    #url = 'file:' + os.path.join(template_dir, fname)
    url = os.path.join(template_dir, fname)
    webbrowser.open(url)
    print(
        'To see your visualization, click the URL below (or locate the files):'
    )
    print(url)
    print('Advanced options are available in ')
    print(local_dir2 + '/' + 'ACM_' + param['filename_suffix'] +
          '/data/CONFIG_' + param['filename_suffix'] + '.js')
Example #17
0
def test_Community_from_lodes():
    dc = Community.from_lodes(state_fips="11", years=[2010, 2015])
    assert dc.gdf.shape == (6046, 57)
Example #18
0
def test_Community_from_lodes():
    dc = Community.from_lodes(state_fips="10", years=[2008, 2015])
    assert dc.gdf.shape == (8674, 57)
Example #19
0
from geosnap import Community, io
import numpy as np

reno = Community.from_census(msa_fips="39900")
columns = ["median_household_income", "p_poverty_rate", "p_unemployment_rate"]

# Aspatial Clusters


def test_gm():

    r = reno.cluster(columns=columns,
                     method="gaussian_mixture",
                     best_model=True)
    assert len(r.gdf.gaussian_mixture.unique()) >= 5


def test_ward():
    r = reno.cluster(columns=columns, method="ward")
    assert len(r.gdf.ward.unique()) == 7


def test_spectral():

    r = reno.cluster(columns=columns, method="spectral")
    assert len(r.gdf.spectral.unique()) == 7


def test_kmeans():

    r = reno.cluster(columns=columns, method="kmeans")
Example #20
0
def test_Community_from_lodes():
    de = Community.from_lodes(state_fips="10", years=[2008, 2015])
    assert de.gdf.shape == (41598, 58)
Example #21
0
#%%
from geosnap import Community

#%%
fip = '48'
out1 = '/tmp/TX_2000.wkt'
out2 = '/tmp/TX_2010.wkt'

#%%
data = Community.from_census(state_fips='48')
dataset = data.gdf[['geometry', 'geoid', 'year']]

#%%
D_2000 = dataset[(dataset.year == 2000)]
D_2000.to_csv(out1, sep='\t', index=False, header=False)

#%%
D_2010 = dataset[(dataset.year == 2010)]
D_2010.to_csv(out2, sep='\t', index=False, header=False)