コード例 #1
0
ファイル: test_extract.py プロジェクト: mggg/gdutils
def test_infile():
    test_et = et.ExtractTable()

    test_et.infile = good_inf1
    assert test_et.infile == good_inf1

    with pytest.raises(Exception):
        test_et.value = good_val1a
    with pytest.raises(Exception):
        test_et.column = bad_col
    with pytest.raises(Exception):
        test_et.infile = bad_inf
    with pytest.raises(Exception):
        test_et.infile = good_inf2

    extract = test_et.extract()

    with pytest.raises(Exception):
        test_et.infile = zip_inf

    test_et2 = et.ExtractTable()
    test_et2.infile = zip_inf

    with pytest.raises(Exception):
        test_et.value = good_val1a
    with pytest.raises(Exception):
        test_et.column = bad_col
    with pytest.raises(Exception):
        test_et.infile = bad_inf
    with pytest.raises(Exception):
        test_et.infile = good_inf2

    extract = test_et2.extract()
コード例 #2
0
ファイル: test_extract.py プロジェクト: mggg/gdutils
def test_constructor_errors():
    with pytest.raises(Exception):
        test_et = et.ExtractTable(bad_inf)
    with pytest.raises(Exception):
        test_et = et.ExtractTable(good_inf1, None, bad_col)
    with pytest.raises(Exception):
        test_et = et.ExtractTable(good_inf1, None, bad_col, bad_val)
    with pytest.raises(Exception):
        test_et = et.ExtractTable(good_inf1, None, good_col1a, bad_val)
コード例 #3
0
ファイル: test_dataqa.py プロジェクト: mggg/gdutils
def test_compare_column_sums():
    df1 = pd.DataFrame(data=[[1, 2, 3], [4, 5, 6]],
                       columns=['COL1', 'COL2', 'COL3'])
    df2 = pd.DataFrame(data=[[4, 5], [1, 2]], columns=['col2', 'col1'])
    df3 = pd.DataFrame(data=[['asdf', 'fdsa'], ['foo', 'bar']],
                       columns=['c1', 'c2'])

    with pytest.raises(Exception):
        results = dq.compare_column_sums(pd.DataFrame(), pd.DataFrame(),
                                         'asdf', 'asdf')
    with pytest.raises(Exception):
        results = dq.compare_column_sums(pd.DataFrame(), df1, 'asdf', 'asdf')
    with pytest.raises(Exception):
        results = dq.compare_column_sums(df1, df2, 'col2', 'COL1')
    with pytest.raises(Exception):
        results = dq.compare_column_sums(df1, df2, ['col2'], ['COL1'])
    with pytest.raises(Exception):
        results = dq.compare_column_sums(df1, df2, df1.columns, df2.columns)
    with pytest.raises(Exception):
        results = dq.compare_column_sums(df1, df2, ['COL1', 'COL2'],
                                         ['col1', 'col3'])
    with pytest.raises(Exception):
        results = dq.compare_column_sums(df1, df2, 'COL1', ['col1'])
    with pytest.raises(Exception):
        results = dq.compare_column_sums(df1, df3, 'COL1', 'c1')

    results = dq.compare_column_sums(df1, df2, ['COL1'], ['col1'])
    assert results == [('COL1 [vs] col1', -2)]

    results = dq.compare_column_sums(df1, df2, ['COL1', 'COL3'],
                                     ['col1', 'col2'])
    assert results == [('COL1 [vs] col1', -2), ('COL3 [vs] col2', 4)]

    mggg_gdf1 = et.ExtractTable(mggg_gdf, column='PRECINCT').extract()
    medsl_df1 = et.ExtractTable(medsl_df, column='precinct').extract()
    mggg_cols = ['AG18D', 'AG18R', 'COMP18D']
    medsl_cols = [
        'Attorney General democrat', 'Attorney General republican',
        'Comptroller democrat'
    ]
    results = dq.compare_column_sums(mggg_gdf1, medsl_df1, mggg_cols,
                                     medsl_cols)

    mggg_sums = dq.sum_column_values(mggg_gdf1, mggg_cols)
    medsl_sums = dq.sum_column_values(medsl_df, medsl_cols)

    to_comp = list(
        map(
            lambda tup1, tup2: ('{} [vs] {}'.format(tup1[0], tup2[0]),
                                (tup1[1] - tup2[1])), mggg_sums, medsl_sums))
    assert set(results) == set(to_comp)
コード例 #4
0
ファイル: test_extract.py プロジェクト: mggg/gdutils
def test_list_values():
    test_et = et.ExtractTable()

    with pytest.raises(Exception):
        cols = test_et.list_values()

    test_et.infile = good_inf1

    vals = test_et.list_values(good_col1a)
    assert type(vals) == np.ndarray
    assert (vals == np.array(full_vals1)).all()

    vals = test_et.list_values(good_col1a, unique=True)
    assert type(vals) == np.ndarray
    assert set(vals) == set(np.unique(np.array(full_vals1, dtype=object)))

    test_et.column = good_col1b

    vals = test_et.list_values(good_col1a)
    assert type(vals) == np.ndarray
    assert (vals == np.array(full_vals1)).all()

    vals = test_et.list_values(good_col1a, unique=True)
    assert type(vals) == np.ndarray
    assert set(vals) == set(np.unique(np.array(full_vals1, dtype=object)))

    test_et.column = good_col1a

    vals = test_et.list_values()
    assert type(vals) == np.ndarray
    assert (vals == np.array(full_vals1)).all()

    vals = test_et.list_values(unique=True)
    assert type(vals) == np.ndarray
    assert set(vals) == set(np.unique(np.array(full_vals1, dtype=object)))
コード例 #5
0
ファイル: test_extract.py プロジェクト: mggg/gdutils
def test_outfile():
    test_et = et.ExtractTable(good_inf1)

    test_et.outfile = good_out
    assert test_et.outfile == PosixPath(good_out)

    test_et.outfile = dne_out
    assert test_et.outfile == PosixPath(dne_out)
コード例 #6
0
ファイル: test_extract.py プロジェクト: mggg/gdutils
def test_empty_constructor():
    test_et = et.ExtractTable()

    assert test_et.infile is None
    assert test_et.outfile is None
    assert test_et.column is None
    assert test_et.value is None

    with pytest.raises(Exception):
        extracted = test_et.extract()
コード例 #7
0
ファイル: test_extract.py プロジェクト: mggg/gdutils
def test_value():
    test_et = et.ExtractTable()
    test_et.infile = good_inf1
    test_et.column = good_col1a

    test_et.value = good_val1a
    assert test_et.value == good_val1a

    test_et.value = 'b'
    assert test_et.value == 'b'

    extract = test_et.extract()
コード例 #8
0
ファイル: test_extract.py プロジェクト: mggg/gdutils
def test_column():
    test_et = et.ExtractTable()
    test_et.infile = good_inf1

    test_et.column = good_col1a
    assert test_et.column == good_col1a

    with pytest.raises(Exception):
        test_et.value = bad_val
    with pytest.raises(Exception):
        test_et.column = bad_col

    extract = test_et.extract()
コード例 #9
0
ファイル: test_extract.py プロジェクト: mggg/gdutils
def test_constructor():
    test_et = et.ExtractTable(good_inf1)
    assert test_et.infile == good_inf1
    assert test_et.outfile is None
    assert test_et.column is None
    assert test_et.value is None

    test_et = et.ExtractTable(good_inf1, good_out)
    assert test_et.infile == good_inf1
    assert test_et.outfile == PosixPath(good_out)
    assert test_et.column is None
    assert test_et.value is None

    test_et = et.ExtractTable(good_inf1, good_out, good_col1a)
    assert test_et.infile == good_inf1
    assert test_et.outfile == PosixPath(good_out)
    assert test_et.column == good_col1a
    assert test_et.value is None

    test_et = et.ExtractTable(good_inf1, good_out, good_col1a, good_val1a)
    assert test_et.infile == good_inf1
    assert test_et.outfile == PosixPath(good_out)
    assert test_et.column == good_col1a
    assert test_et.value == good_val1a

    test_et = et.ExtractTable(good_inf1, None, good_col1a)
    assert test_et.infile == good_inf1
    assert test_et.outfile == None
    assert test_et.column == good_col1a
    assert test_et.value == None

    test_et = et.ExtractTable(good_inf1, column=good_col1a)
    assert test_et.infile == good_inf1
    assert test_et.outfile == None
    assert test_et.column == good_col1a
    assert test_et.value == None

    test_et = et.ExtractTable(good_inf1, column=good_col1a, value=good_val1a)
    assert test_et.infile == good_inf1
    assert test_et.outfile == None
    assert test_et.column == good_col1a
    assert test_et.value == good_val1a

    test_et = et.ExtractTable(good_inf1, None, good_col1a, good_vals1a)
    assert test_et.infile == good_inf1
    assert test_et.outfile == None
    assert test_et.column == good_col1a
    assert test_et.value == good_vals1a
コード例 #10
0
ファイル: test_extract.py プロジェクト: mggg/gdutils
def test_extract_to_file():
    del_outs()

    test_et = et.ExtractTable(good_inf1, good_out)

    test_et.extract_to_file()
    assert os.path.isfile(good_out)

    test_et.outfile = dne_out
    assert not os.path.isfile(dne_out)
    assert not os.path.isdir(dne_dir)
    test_et.extract_to_file()
    assert os.path.isfile(dne_out)

    del_outs()
コード例 #11
0
ファイル: test_extract.py プロジェクト: mggg/gdutils
def test_list_columns():
    test_et = et.ExtractTable()

    with pytest.raises(Exception):
        cols = test_et.list_columns()

    test_et = et.read_file(good_inf1)
    cols = test_et.list_columns()
    assert type(cols) == np.ndarray
    assert (cols == np.array(full_cols1)).all()

    test_et = et.read_file(good_inf2)
    cols = test_et.list_columns()
    assert type(cols) == np.ndarray
    assert (cols == np.array(full_cols2, dtype=object)).all()
コード例 #12
0
ファイル: test_extract.py プロジェクト: mggg/gdutils
def test_setters_2():
    test_et = et.ExtractTable()
    test_et.infile = good_inf1
    test_et.column = good_col1a
    test_et.value = good_val1a

    assert test_et.column == good_col1a
    assert test_et.value == good_val1a

    test_et.column = good_col1b
    assert test_et.column == good_col1b
    assert test_et.value is None

    test_et.value = good_val1b
    assert test_et.value == good_val1b

    test_et2 = et.ExtractTable()
    test_et2.infile = good_inf2
    test_et2.column = good_col2
    test_et2.value = good_val2

    assert test_et2.infile == good_inf2
    assert test_et2.column == good_col2
    assert test_et2.value == good_val2
コード例 #13
0
ファイル: test_extract.py プロジェクト: mggg/gdutils
def test_extract():
    test_et = et.ExtractTable(good_inf1)
    gdf1 = gpd.read_file(good_inf1)
    gdf1 = gdf1.rename(columns={'field_1': 'Unnamed: 0'})
    # Note: gpd has inconsistent naming compared with pd

    extract = test_et.extract()
    assert type(extract) == gpd.GeoDataFrame
    assert extract.equals(gdf1)

    test_et.column = good_col1a
    gdf1 = gdf1.set_index(good_col1a)
    extract = test_et.extract()
    assert type(extract) == gpd.GeoDataFrame
    assert extract.equals(gdf1)

    test_et.value = good_val1a
    gdf1 = gpd.GeoDataFrame(gdf1.loc[good_val1a])
    extract = test_et.extract()
    assert type(extract) == gpd.GeoDataFrame
    assert extract.equals(gdf1)
コード例 #14
0
ファイル: test_dataqa.py プロジェクト: mggg/gdutils
def test_compare_column_values():  # remove 'no' prefix once ready to test
    df1 = pd.DataFrame(data=[[1, 2, 3], [4, 5, 6]],
                       columns=['COL1', 'COL2', 'COL3'])
    df2 = pd.DataFrame(data=[[4, 5], [1, 2]], columns=['col2', 'col1'])
    df3 = pd.DataFrame(data=[['asdf', 'fdsa'], ['foo', 'bar']],
                       columns=['c1', 'c2'])

    with pytest.raises(Exception):
        results = dq.compare_column_values(pd.DataFrame(), pd.DataFrame(),
                                           'asdf', 'asdf')
    with pytest.raises(Exception):
        results = dq.compare_column_values(pd.DataFrame(), df1, 'asdf', 'asdf')
    with pytest.raises(Exception):
        results = dq.compare_column_values(df1, df2, 'col2', 'COL1')
    with pytest.raises(Exception):
        results = dq.compare_column_values(df1, df2, ['col2'], ['COL1'])
    with pytest.raises(Exception):
        results = dq.compare_column_values(df1, df2, 'COL1', 'col2')
    with pytest.raises(Exception):
        results = dq.compare_column_values(df1, df2, df1.columns, df2.columns)
    with pytest.raises(Exception):
        results = dq.compare_column_values(df1, df2, ['COL1', 'COL2'],
                                           ['col1', 'col2'], [1], ['adsf'])
    with pytest.raises(Exception):
        results = dq.compare_column_values(df1, df2, ['COL1'], ['col1'], [1],
                                           [-1])
    with pytest.raises(Exception):
        results = dq.compare_column_values(df1, df3, ['COL1'], ['c1'])
    with pytest.raises(Exception):
        results = dq.compare_column_values(df1, df2, ['COL1'], ['col2'],
                                           [0, 1], [1])
    with pytest.raises(Exception):
        results = dq.compare_column_values(df1, df2, ['COL1'], ['col2'],
                                           [0, 1], [0, 5])

    results = dq.compare_column_values(df1, df2, ['COL1'], ['col1'])
    assert results == {'COL1 [vs] col1': [('0 [vs] 0', -4), ('1 [vs] 1', 2)]}

    results = dq.compare_column_values(df1, df2, ['COL3'], ['col2'])
    assert results == {'COL3 [vs] col2': [('0 [vs] 0', -1), ('1 [vs] 1', 5)]}

    results = dq.compare_column_values(df1, df2, ['COL1', 'COL2'],
                                       ['col1', 'col2'])
    assert results == {
        'COL1 [vs] col1': [('0 [vs] 0', -4), ('1 [vs] 1', 2)],
        'COL2 [vs] col2': [('0 [vs] 0', -2), ('1 [vs] 1', 4)]
    }

    results = dq.compare_column_values(df1, df2, ['COL1'], ['col1'], [0], [1])
    assert results == {'COL1 [vs] col1': [('0 [vs] 1', -1)]}

    results = dq.compare_column_values(df1, df2, ['COL1'], ['col1'], [0, 1],
                                       [1, 0])
    assert results == {'COL1 [vs] col1': [('0 [vs] 1', -1), ('1 [vs] 0', -1)]}

    results = dq.compare_column_values(df1, df1, ['COL1'], ['COL2'], [0], [0])
    assert results == {'COL1 [vs] COL2': [('0 [vs] 0', -1)]}

    mggg_gdf1 = et.ExtractTable(mggg_gdf, column='PRECINCT').extract()
    medsl_df1 = et.ExtractTable(medsl_df, column='precinct').extract()
    results = dq.compare_column_values(
        mggg_gdf1, medsl_df1, ['AG18D'], ['Attorney General democrat'],
        ['Plainfield - DISTRICT 1-1-1a Town Hall'], ['1a Town Hall'])
    _, diff = results['AG18D [vs] Attorney General democrat'][0]
    ct_et = et.ExtractTable(mggg_gdf,
                            column='PRECINCT',
                            value='Plainfield - DISTRICT 1-1-1a Town Hall')
    medsl_et = et.ExtractTable(medsl_df,
                               column='precinct',
                               value='1a Town Hall')
    assert diff == abs(ct_et.extract()['AG18D'][0] -
                       medsl_et.extract()['Attorney General democrat'][0])
コード例 #15
0
ファイル: test_extract.py プロジェクト: mggg/gdutils
def notest_large():
    large_file = ''
    test_et = et.ExtractTable(large_file,
                              'tests/dumps/large.zip',
                              column='NAME10')
    test_et.extract_to_file()