def test_random_state(): # no seeds reno = Community.from_census(msa_fips="39900", datastore=DataStore()) r1 = reno.cluster(columns=columns, method="kmeans", n_clusters=5) r2 = reno.cluster(columns=columns, method="kmeans", n_clusters=5) card1 = r1.gdf.groupby("kmeans").count()["geoid"].values card1.sort() card2 = r2.gdf.groupby("kmeans").count()["geoid"].values card2.sort() # test that the cardinalities are different np.testing.assert_raises( AssertionError, np.testing.assert_array_equal, card1, card2 ) # seeds reno = Community.from_census(msa_fips="39900", datastore=DataStore()) seed = 10 r1 = reno.cluster(columns=columns, method="kmeans", n_clusters=5, random_state=seed) r2 = reno.cluster(columns=columns, method="kmeans", n_clusters=5, random_state=seed) card1 = r1.gdf.groupby("kmeans").count()["geoid"].values card1.sort() card2 = r2.gdf.groupby("kmeans").count()["geoid"].values card2.sort() # test that the cardinalities are identical np.testing.assert_array_equal(card1, card2)
from geosnap import Community import numpy as np from geosnap import DataStore from numpy.testing import assert_array_equal, assert_array_almost_equal reno = Community.from_census(msa_fips="39900", datastore=DataStore()) columns = [ "median_household_income", "p_poverty_rate", "p_unemployment_rate", ] # Aspatial Clusters def test_gm(): r = reno.cluster(columns=columns, method="gaussian_mixture", best_model=True) assert len(r.gdf.gaussian_mixture.unique()) >= 5 def test_ward(): r = reno.cluster(columns=columns, method="ward") assert len(r.gdf.ward.unique()) == 7 def test_spectral(): r = reno.cluster(columns=columns, method="spectral") assert len(r.gdf.spectral.unique()) == 7
import os import pytest from geosnap import DataStore, io try: LTDB = os.environ["LTDB_SAMPLE"] NCDB = os.environ["NCDB"] except: LTDB = None NCDB = None store = DataStore() def test_nces_schools(): schools = io.get_nces(store, dataset="schools") assert schools.shape == (102209, 26) def test_nces_school_dists(): dists = io.get_nces(store, dataset="school_districts") assert dists.shape == (13352, 18) def test_ejscreen(): ej = io.get_ejscreen(store, years=[2018], fips=["11"]) assert ej.shape == (450, 369) def test_nces_sabs():
def test_linc_method(): columns = [ "median_household_income", "p_poverty_rate", "p_unemployment_rate", "n_total_pop", ] reno = get_census(DataStore(), msa_fips="39900") rdf = harmonize(reno, target_year=2010, intensive_variables=columns) _, model = analyze.cluster(rdf, columns=columns, method="ward", return_model=True) l = model.lincs.linc.values assert_array_almost_equal( l, np.array([ 0.9047619, 0.94594595, 0.82608696, 0.875, 0.97142857, 0.9047619, 1.0, 0.96428571, 0.97560976, 1.0, 0.82608696, 1.0, 0.92682927, 0.94285714, 1.0, 0.94285714, 0.92682927, 1.0, 0.90909091, 0.94285714, 1.0, 1.0, 1.0, 0.975, 0.9047619, 0.97560976, 1.0, 0.82608696, 0.82608696, 0.94594595, 0.875, 0.875, 0.96428571, 0.875, 0.90625, 1.0, 0.9137931, 0.98360656, 1.0, 0.875, 1.0, 0.98181818, 0.97619048, 0.90909091, 0.98181818, 0.90909091, 0.94594595, 0.82608696, 0.97619048, 0.90909091, 0.90625, 0.9137931, 0.93333333, 0.93333333, 1.0, 1.0, 0.93333333, 0.93333333, 0.975, 0.90625, 0.96666667, 0.96666667, 0.98507463, 0.9137931, 0.94339623, 0.93939394, 0.93939394, 0.94339623, 0.94339623, 0.9137931, 0.97142857, 0.875, 0.93939394, 0.93939394, 0.93939394, 0.98507463, 1.0, 1.0, 0.9047619, 0.96666667, 0.9047619, 0.90909091, 0.94339623, 0.90625, 0.90625, 0.9137931, 0.9137931, 0.98214286, 0.984375, 0.95918367, 0.95918367, 0.95918367, 0.92682927, 0.92682927, 0.98360656, 0.96551724, 0.98214286, 0.96551724, 0.984375, 1.0, 1.0, 0.98214286, 0.96551724, 0.90625, 0.90625, 0.98214286, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ]), decimal=3, )
def test_linc_from_gdf(): columns = [ "median_household_income", "p_poverty_rate", "p_unemployment_rate", ] reno = get_census(DataStore(), msa_fips="39900") rdf = harmonize(reno, target_year=1990, intensive_variables=columns) rdf = analyze.cluster(reno, columns=columns, method="ward") l = lincs_from_gdf(rdf, unit_index="geoid", temporal_index="year", cluster_col="ward") assert_array_almost_equal( l.linc.values, np.array([ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.85714286, 0.5, 1.0, 0.8, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.85714286, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 1.0, 1.0, 1.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ]), decimal=3, )
def test_linc_from_gdf_subset(): columns = [ "median_household_income", "p_poverty_rate", "p_unemployment_rate", "n_total_pop", ] reno = get_census(DataStore(), msa_fips="39900") rdf = harmonize(reno, target_year=1990, intensive_variables=columns) rdf = analyze.cluster( rdf, columns=columns, method="ward", ) l = lincs_from_gdf( rdf, unit_index="geoid", temporal_index="year", cluster_col="ward", periods=[2000, 2010], ) assert_array_almost_equal( l.linc.values, np.array([ 0.96969697, 0.78571429, 0.8, 0.75, 0.66666667, 0.8125, 0.78571429, 0.80952381, 1.0, 0.8, 0.75, 0.74074074, 0.80952381, 0.80952381, 0.92307692, 1.0, 0.8, 0.78571429, 0.78571429, 0.75, 0.8125, 0.75, 0.74074074, 0.74074074, 0.8, 0.75, 0.66666667, 0.90909091, 0.66666667, 0.92307692, 1.0, 1.0, 0.74074074, 0.80952381, 1.0, 1.0, 1.0, 0.74074074, 0.96969697, 1.0, 0.8125, 0.74074074, 0.74074074, 1.0, 0.80952381, 0.8125, 0.96153846, 0.90909091, 0.74074074, 0.66666667, 0.66666667, 0.66666667, 0.66666667, 0.66666667, 0.66666667, 0.96153846, 0.66666667, 0.66666667, ]), decimal=3, )
from geosnap import DataStore datasets = DataStore() def test_data_dir(): loc = datasets.show_data_dir() assert len(loc) > 5 def test_acs(): df = datasets.acs(year=2012, states=["11"]) assert df.shape == (179, 104) def test_tracts90(): df = datasets.tracts_1990(states=["11"]) assert df.shape == (192, 164) def test_tracts00(): df = datasets.tracts_2000(states=["11"]) assert df.shape == (188, 192) def test_tracts10(): df = datasets.tracts_2010(states=["11"]) assert df.shape == (179, 194) def test_counties(): assert datasets.counties().shape == (3233, 2)