Example #1
0
def test_query_transform_mean():
    original = pd.DataFrame(
        {
            'distance': [20, 10, 30, 40],
            'token': ['t1', 't1', 't2', 't2']
        },
        index=['a', 'b', 'c', 'd'])
    transformed = query_transform(PandasTable(original),
                                  {'query': 'distance < mean[distance]'})
    expected = pd.DataFrame({
        'distance': [20, 10],
        'token': ['t1', 't1']
    },
                            index=['a', 'b'])
    assert expected.equals(transformed.to_pandas())
Example #2
0
def test_histogram_transform_numeric():
    original = pd.DataFrame({'distance': [20, 10, 30, 40]},
                            index=['a', 'b', 'c', 'd'])
    transformed = histogram_transform(PandasTable(original), {
        'column': 'distance',
        'bins': 2
    })
    expected_side_result = {
        'x': [17.5, 32.5],
        'y': [2, 2],
        'width': [15., 15.]
    }
    assert original.equals(transformed.to_pandas())
    assert np.allclose(expected_side_result['x'], transformed.side_result['x'])
    assert np.allclose(expected_side_result['y'], transformed.side_result['y'])
    assert np.allclose(expected_side_result['width'],
                       transformed.side_result['width'])
Example #3
0
def test_search_transform_string():
    original = pd.DataFrame(
        {
            'distance': [20, 10, 30, 40],
            'token': ['t1', 't1', 't2', 't2']
        },
        index=['a', 'b', 'c', 'd'])
    transformed = search_transform(PandasTable(original), {
        'column': 'token',
        'searchterm': 't2'
    })
    expected = pd.DataFrame({
        'distance': [30, 40],
        'token': ['t2', 't2']
    },
                            index=['c', 'd'])
    assert expected.equals(transformed.to_pandas())
Example #4
0
def test_histogram_transform_categorical():
    original = pd.DataFrame({'token': ['t1', 't1', 't2', 't2']},
                            index=['a', 'b', 'c', 'd'])
    transformed = histogram_transform(PandasTable(original), {
        'column': 'token',
        'bins': 2
    })
    expected_side_result = {
        'x': ['t1', 't2', 'Other'],
        'y': [2, 2, 0],
        'width': None
    }
    assert original.equals(transformed.to_pandas())
    assert expected_side_result.keys() == transformed.side_result.keys()
    assert expected_side_result['width'] == transformed.side_result['width']
    assert set(expected_side_result['x']) == set(
        transformed.side_result['x'])  # 't1', 't2' may be flipped
    assert expected_side_result['y'] == transformed.side_result['y']
Example #5
0
def test_sample_transform():
    original = pd.DataFrame({'distance': [20, 10, 30]}, index=['a', 'b', 'c'])
    transformed = sample_transform(PandasTable(original), {'fraction': 1.0})
    expected = pd.DataFrame({'distance': [20, 10, 30]}, index=['a', 'b', 'c'])
    assert expected.equals(transformed.to_pandas())
Example #6
0
def test_pandas_table_apply_bounds():
    original = pd.DataFrame({'distance': [20, 10, 30, 40]},
                            index=['a', 'b', 'c', 'd'])
    expected = pd.DataFrame({'distance': [10, 30]}, index=['b', 'c'])
    assert expected.equals(
        PandasTable(original).apply_bounds(1, 2).to_pandas())
Example #7
0
def test_pandas_table_to_pandas():
    original = pd.DataFrame({'distance': [20, 10, 30, 40]},
                            index=['a', 'b', 'c', 'd'])
    assert original.equals(PandasTable(original).to_pandas())
Example #8
0
def test_pandas_table_len():
    original = pd.DataFrame({'distance': [20, 10, 30, 40]},
                            index=['a', 'b', 'c', 'd'])
    assert 4 == len(PandasTable(original))
def test_sort_transform():
    original = pd.DataFrame({'distance': [20, 10, 30]}, index=['a', 'b', 'c'])
    transformed = sort_transform(PandasTable(original), {'column': 'distance', 'ascending': False})
    expected = pd.DataFrame({'distance': [30, 20, 10]}, index=['c', 'a', 'b'])
    assert expected.equals(transformed.to_pandas())