예제 #1
0
def test_gen_valmaps_default(smalldf):
    dtypes = ['continuous', 'categorical', 'categorical', 'continuous']
    metadata = dict()
    valmaps = du.gen_valmaps(smalldf, dtypes, metadata)

    assert 0 not in valmaps
    assert 3 not in valmaps

    assert 1 in valmaps
    assert 2 in valmaps

    valmap_1 = valmaps[1]
    valmap_2 = valmaps[2]

    assert len(valmap_1['val2idx']) == 2
    assert len(valmap_1['idx2val']) == 2
    assert valmap_1['val2idx'][0] == 0
    assert valmap_1['val2idx'][1] == 1
    assert valmap_1['idx2val'][0] == 0
    assert valmap_1['idx2val'][1] == 1

    assert len(valmap_2['val2idx']) == 3
    assert len(valmap_2['idx2val']) == 3
    assert valmap_2['val2idx']['one'] == 0
    assert valmap_2['val2idx']['three'] == 1
    assert valmap_2['val2idx']['two'] == 2
    assert valmap_2['idx2val'][0] == 'one'
    assert valmap_2['idx2val'][1] == 'three'
    assert valmap_2['idx2val'][2] == 'two'
예제 #2
0
def test_gen_valmaps_default_missing_vals(smalldf):
    # Missing value NaN should not appear in the value map
    dtypes = ['continuous', 'categorical', 'categorical', 'continuous']
    metadata = dict()
    smalldf.ix[0, 1] = float('NaN')
    smalldf.ix[1, 2] = float('NaN')
    valmaps = du.gen_valmaps(smalldf, dtypes, metadata)

    assert 0 not in valmaps
    assert 3 not in valmaps

    assert 1 in valmaps
    assert 2 in valmaps

    valmap_1 = valmaps[1]
    valmap_2 = valmaps[2]

    assert len(valmap_1['val2idx']) == 2
    assert len(valmap_1['idx2val']) == 2
    assert valmap_1['val2idx'][0] == 0
    assert valmap_1['val2idx'][1] == 1
    assert valmap_1['idx2val'][0] == 0
    assert valmap_1['idx2val'][1] == 1

    assert len(valmap_2['val2idx']) == 3
    assert len(valmap_2['idx2val']) == 3
    assert valmap_2['val2idx']['one'] == 0
    assert valmap_2['val2idx']['three'] == 1
    assert valmap_2['val2idx']['two'] == 2
    assert valmap_2['idx2val'][0] == 'one'
    assert valmap_2['idx2val'][1] == 'three'
    assert valmap_2['idx2val'][2] == 'two'
예제 #3
0
def test_dataframe_to_array_all_categorical_with_missing_vals():
    s_1 = pd.Series([-1, 0, 2, 1, float('NaN')])
    s_2 = pd.Series(['one', 'two', 'three', 'four', float('NaN')])
    df = pd.concat([s_1, s_2], axis=1)

    dtypes = ['categorical'] * 2
    metadata = dict()

    valmaps = du.gen_valmaps(df, dtypes, metadata)
    data = du.dataframe_to_array(df, valmaps)

    assert data.shape == df.shape
    assert 'float' in str(data.dtype)

    assert data[0, 0] == 0
    assert data[1, 0] == 1
    assert data[2, 0] == 3
    assert data[3, 0] == 2
    assert np.isnan(data[4, 0])

    assert data[0, 1] == 1
    assert data[1, 1] == 3
    assert data[2, 1] == 2
    assert data[3, 1] == 0
    assert np.isnan(data[4, 1])
예제 #4
0
def test_gen_valmaps_default(smalldf):
    dtypes = ['continuous', 'categorical', 'categorical', 'continuous']
    metadata = dict()
    valmaps = du.gen_valmaps(smalldf, dtypes, metadata)

    assert 0 not in valmaps
    assert 3 not in valmaps

    assert 1 in valmaps
    assert 2 in valmaps

    valmap_1 = valmaps[1]
    valmap_2 = valmaps[2]

    assert len(valmap_1['val2idx']) == 2
    assert len(valmap_1['idx2val']) == 2
    assert valmap_1['val2idx'][0] == 0
    assert valmap_1['val2idx'][1] == 1
    assert valmap_1['idx2val'][0] == 0
    assert valmap_1['idx2val'][1] == 1

    assert len(valmap_2['val2idx']) == 3
    assert len(valmap_2['idx2val']) == 3
    assert valmap_2['val2idx']['one'] == 0
    assert valmap_2['val2idx']['three'] == 1
    assert valmap_2['val2idx']['two'] == 2
    assert valmap_2['idx2val'][0] == 'one'
    assert valmap_2['idx2val'][1] == 'three'
    assert valmap_2['idx2val'][2] == 'two'
예제 #5
0
def test_dataframe_to_array_all_categorical_with_missing_vals():
    s_1 = pd.Series([-1, 0, 2, 1, float('NaN')])
    s_2 = pd.Series(['one', 'two', 'three', 'four', float('NaN')])
    df = pd.concat([s_1, s_2], axis=1)

    dtypes = ['categorical']*2
    metadata = dict()

    valmaps = du.gen_valmaps(df, dtypes, metadata)
    data = du.dataframe_to_array(df, valmaps)

    assert data.shape == df.shape
    assert 'float' in str(data.dtype)

    assert data[0, 0] == 0
    assert data[1, 0] == 1
    assert data[2, 0] == 3
    assert data[3, 0] == 2
    assert np.isnan(data[4, 0])

    assert data[0, 1] == 1
    assert data[1, 1] == 3
    assert data[2, 1] == 2
    assert data[3, 1] == 0
    assert np.isnan(data[4, 1])
예제 #6
0
def test_gen_valmaps_default_missing_vals(smalldf):
    # Missing value NaN should not appear in the value map
    dtypes = ['continuous', 'categorical', 'categorical', 'continuous']
    metadata = dict()
    smalldf.ix[0, 1] = float('NaN')
    smalldf.ix[1, 2] = float('NaN')
    valmaps = du.gen_valmaps(smalldf, dtypes, metadata)

    assert 0 not in valmaps
    assert 3 not in valmaps

    assert 1 in valmaps
    assert 2 in valmaps

    valmap_1 = valmaps[1]
    valmap_2 = valmaps[2]

    assert len(valmap_1['val2idx']) == 2
    assert len(valmap_1['idx2val']) == 2
    assert valmap_1['val2idx'][0] == 0
    assert valmap_1['val2idx'][1] == 1
    assert valmap_1['idx2val'][0] == 0
    assert valmap_1['idx2val'][1] == 1

    assert len(valmap_2['val2idx']) == 3
    assert len(valmap_2['idx2val']) == 3
    assert valmap_2['val2idx']['one'] == 0
    assert valmap_2['val2idx']['three'] == 1
    assert valmap_2['val2idx']['two'] == 2
    assert valmap_2['idx2val'][0] == 'one'
    assert valmap_2['idx2val'][1] == 'three'
    assert valmap_2['idx2val'][2] == 'two'
예제 #7
0
def test_gen_valmaps_metadata(smalldf):
    dtypes = ['continuous', 'categorical', 'categorical', 'continuous']
    metadata = {}
    metadata[1] = {
        'dtype': 'categorical',
        'values': [-1, 0, 1, 2, 99]}
    metadata[2] = {
        'dtype': 'categorical',
        'values': ['zero', 'one', 'two', 'three']}

    valmaps = du.gen_valmaps(smalldf, dtypes, metadata)

    assert 0 not in valmaps
    assert 3 not in valmaps

    assert 1 in valmaps
    assert 2 in valmaps

    valmap_1 = valmaps[1]
    valmap_2 = valmaps[2]

    assert len(valmap_1['val2idx']) == 5
    assert len(valmap_1['idx2val']) == 5

    # col 2
    assert valmap_1['val2idx'][-1] == 0
    assert valmap_1['val2idx'][0] == 1
    assert valmap_1['val2idx'][1] == 2
    assert valmap_1['val2idx'][2] == 3
    assert valmap_1['val2idx'][99] == 4

    assert valmap_1['idx2val'][0] == -1
    assert valmap_1['idx2val'][1] == 0
    assert valmap_1['idx2val'][2] == 1
    assert valmap_1['idx2val'][3] == 2
    assert valmap_1['idx2val'][4] == 99

    # col 2
    assert valmap_2['val2idx']['one'] == 0
    assert valmap_2['val2idx']['three'] == 1
    assert valmap_2['val2idx']['two'] == 2
    assert valmap_2['val2idx']['zero'] == 3

    assert valmap_2['idx2val'][0] == 'one'
    assert valmap_2['idx2val'][1] == 'three'
    assert valmap_2['idx2val'][2] == 'two'
    assert valmap_2['idx2val'][3] == 'zero'
예제 #8
0
def test_gen_valmaps_metadata(smalldf):
    dtypes = ['continuous', 'categorical', 'categorical', 'continuous']
    metadata = {}
    metadata[1] = {'dtype': 'categorical', 'values': [-1, 0, 1, 2, 99]}
    metadata[2] = {
        'dtype': 'categorical',
        'values': ['zero', 'one', 'two', 'three']
    }

    valmaps = du.gen_valmaps(smalldf, dtypes, metadata)

    assert 0 not in valmaps
    assert 3 not in valmaps

    assert 1 in valmaps
    assert 2 in valmaps

    valmap_1 = valmaps[1]
    valmap_2 = valmaps[2]

    assert len(valmap_1['val2idx']) == 5
    assert len(valmap_1['idx2val']) == 5

    # col 2
    assert valmap_1['val2idx'][-1] == 0
    assert valmap_1['val2idx'][0] == 1
    assert valmap_1['val2idx'][1] == 2
    assert valmap_1['val2idx'][2] == 3
    assert valmap_1['val2idx'][99] == 4

    assert valmap_1['idx2val'][0] == -1
    assert valmap_1['idx2val'][1] == 0
    assert valmap_1['idx2val'][2] == 1
    assert valmap_1['idx2val'][3] == 2
    assert valmap_1['idx2val'][4] == 99

    # col 2
    assert valmap_2['val2idx']['one'] == 0
    assert valmap_2['val2idx']['three'] == 1
    assert valmap_2['val2idx']['two'] == 2
    assert valmap_2['val2idx']['zero'] == 3

    assert valmap_2['idx2val'][0] == 'one'
    assert valmap_2['idx2val'][1] == 'three'
    assert valmap_2['idx2val'][2] == 'two'
    assert valmap_2['idx2val'][3] == 'zero'