Example #1
0
def test_random_state():

    # no seeds
    reno = Community.from_census(msa_fips="39900", datastore=DataStore())
    r1 = reno.cluster(columns=columns, method="kmeans", n_clusters=5)
    r2 = reno.cluster(columns=columns, method="kmeans", n_clusters=5)
    card1 = r1.gdf.groupby("kmeans").count()["geoid"].values
    card1.sort()
    card2 = r2.gdf.groupby("kmeans").count()["geoid"].values
    card2.sort()
    # test that the cardinalities are different
    np.testing.assert_raises(
        AssertionError, np.testing.assert_array_equal, card1, card2
    )

    # seeds
    reno = Community.from_census(msa_fips="39900", datastore=DataStore())
    seed = 10
    r1 = reno.cluster(columns=columns, method="kmeans", n_clusters=5, random_state=seed)
    r2 = reno.cluster(columns=columns, method="kmeans", n_clusters=5, random_state=seed)
    card1 = r1.gdf.groupby("kmeans").count()["geoid"].values
    card1.sort()
    card2 = r2.gdf.groupby("kmeans").count()["geoid"].values
    card2.sort()
    # test that the cardinalities are identical
    np.testing.assert_array_equal(card1, card2)
Example #2
0
from geosnap import Community
import numpy as np
from geosnap import DataStore
from numpy.testing import assert_array_equal, assert_array_almost_equal

reno = Community.from_census(msa_fips="39900", datastore=DataStore())
columns = [
    "median_household_income",
    "p_poverty_rate",
    "p_unemployment_rate",
]

# Aspatial Clusters


def test_gm():

    r = reno.cluster(columns=columns, method="gaussian_mixture", best_model=True)
    assert len(r.gdf.gaussian_mixture.unique()) >= 5


def test_ward():
    r = reno.cluster(columns=columns, method="ward")
    assert len(r.gdf.ward.unique()) == 7


def test_spectral():

    r = reno.cluster(columns=columns, method="spectral")
    assert len(r.gdf.spectral.unique()) == 7
Example #3
0
import os

import pytest
from geosnap import DataStore, io

try:
    LTDB = os.environ["LTDB_SAMPLE"]
    NCDB = os.environ["NCDB"]
except:
    LTDB = None
    NCDB = None

store = DataStore()


def test_nces_schools():
    schools = io.get_nces(store, dataset="schools")
    assert schools.shape == (102209, 26)


def test_nces_school_dists():
    dists = io.get_nces(store, dataset="school_districts")
    assert dists.shape == (13352, 18)


def test_ejscreen():
    ej = io.get_ejscreen(store, years=[2018], fips=["11"])
    assert ej.shape == (450, 369)


def test_nces_sabs():
Example #4
0
def test_linc_method():
    columns = [
        "median_household_income",
        "p_poverty_rate",
        "p_unemployment_rate",
        "n_total_pop",
    ]
    reno = get_census(DataStore(), msa_fips="39900")
    rdf = harmonize(reno, target_year=2010, intensive_variables=columns)

    _, model = analyze.cluster(rdf,
                               columns=columns,
                               method="ward",
                               return_model=True)

    l = model.lincs.linc.values

    assert_array_almost_equal(
        l,
        np.array([
            0.9047619,
            0.94594595,
            0.82608696,
            0.875,
            0.97142857,
            0.9047619,
            1.0,
            0.96428571,
            0.97560976,
            1.0,
            0.82608696,
            1.0,
            0.92682927,
            0.94285714,
            1.0,
            0.94285714,
            0.92682927,
            1.0,
            0.90909091,
            0.94285714,
            1.0,
            1.0,
            1.0,
            0.975,
            0.9047619,
            0.97560976,
            1.0,
            0.82608696,
            0.82608696,
            0.94594595,
            0.875,
            0.875,
            0.96428571,
            0.875,
            0.90625,
            1.0,
            0.9137931,
            0.98360656,
            1.0,
            0.875,
            1.0,
            0.98181818,
            0.97619048,
            0.90909091,
            0.98181818,
            0.90909091,
            0.94594595,
            0.82608696,
            0.97619048,
            0.90909091,
            0.90625,
            0.9137931,
            0.93333333,
            0.93333333,
            1.0,
            1.0,
            0.93333333,
            0.93333333,
            0.975,
            0.90625,
            0.96666667,
            0.96666667,
            0.98507463,
            0.9137931,
            0.94339623,
            0.93939394,
            0.93939394,
            0.94339623,
            0.94339623,
            0.9137931,
            0.97142857,
            0.875,
            0.93939394,
            0.93939394,
            0.93939394,
            0.98507463,
            1.0,
            1.0,
            0.9047619,
            0.96666667,
            0.9047619,
            0.90909091,
            0.94339623,
            0.90625,
            0.90625,
            0.9137931,
            0.9137931,
            0.98214286,
            0.984375,
            0.95918367,
            0.95918367,
            0.95918367,
            0.92682927,
            0.92682927,
            0.98360656,
            0.96551724,
            0.98214286,
            0.96551724,
            0.984375,
            1.0,
            1.0,
            0.98214286,
            0.96551724,
            0.90625,
            0.90625,
            0.98214286,
            1.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
        ]),
        decimal=3,
    )
Example #5
0
def test_linc_from_gdf():
    columns = [
        "median_household_income",
        "p_poverty_rate",
        "p_unemployment_rate",
    ]
    reno = get_census(DataStore(), msa_fips="39900")
    rdf = harmonize(reno, target_year=1990, intensive_variables=columns)

    rdf = analyze.cluster(reno, columns=columns, method="ward")
    l = lincs_from_gdf(rdf,
                       unit_index="geoid",
                       temporal_index="year",
                       cluster_col="ward")
    assert_array_almost_equal(
        l.linc.values,
        np.array([
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.85714286,
            0.5,
            1.0,
            0.8,
            0.0,
            0.0,
            0.0,
            1.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.5,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            1.0,
            0.8,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.85714286,
            0.5,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            1.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.5,
            0.0,
            1.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.5,
            1.0,
            1.0,
            1.0,
            0.0,
            0.5,
            0.5,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
            0.0,
        ]),
        decimal=3,
    )
Example #6
0
def test_linc_from_gdf_subset():
    columns = [
        "median_household_income",
        "p_poverty_rate",
        "p_unemployment_rate",
        "n_total_pop",
    ]
    reno = get_census(DataStore(), msa_fips="39900")
    rdf = harmonize(reno, target_year=1990, intensive_variables=columns)

    rdf = analyze.cluster(
        rdf,
        columns=columns,
        method="ward",
    )
    l = lincs_from_gdf(
        rdf,
        unit_index="geoid",
        temporal_index="year",
        cluster_col="ward",
        periods=[2000, 2010],
    )

    assert_array_almost_equal(
        l.linc.values,
        np.array([
            0.96969697,
            0.78571429,
            0.8,
            0.75,
            0.66666667,
            0.8125,
            0.78571429,
            0.80952381,
            1.0,
            0.8,
            0.75,
            0.74074074,
            0.80952381,
            0.80952381,
            0.92307692,
            1.0,
            0.8,
            0.78571429,
            0.78571429,
            0.75,
            0.8125,
            0.75,
            0.74074074,
            0.74074074,
            0.8,
            0.75,
            0.66666667,
            0.90909091,
            0.66666667,
            0.92307692,
            1.0,
            1.0,
            0.74074074,
            0.80952381,
            1.0,
            1.0,
            1.0,
            0.74074074,
            0.96969697,
            1.0,
            0.8125,
            0.74074074,
            0.74074074,
            1.0,
            0.80952381,
            0.8125,
            0.96153846,
            0.90909091,
            0.74074074,
            0.66666667,
            0.66666667,
            0.66666667,
            0.66666667,
            0.66666667,
            0.66666667,
            0.96153846,
            0.66666667,
            0.66666667,
        ]),
        decimal=3,
    )
Example #7
0
from geosnap import DataStore
datasets = DataStore()


def test_data_dir():
    loc = datasets.show_data_dir()
    assert len(loc) > 5


def test_acs():
    df = datasets.acs(year=2012, states=["11"])
    assert df.shape == (179, 104)


def test_tracts90():
    df = datasets.tracts_1990(states=["11"])
    assert df.shape == (192, 164)


def test_tracts00():
    df = datasets.tracts_2000(states=["11"])
    assert df.shape == (188, 192)


def test_tracts10():
    df = datasets.tracts_2010(states=["11"])
    assert df.shape == (179, 194)


def test_counties():
    assert datasets.counties().shape == (3233, 2)