def test_gen_valmaps_default(smalldf): dtypes = ['continuous', 'categorical', 'categorical', 'continuous'] metadata = dict() valmaps = du.gen_valmaps(smalldf, dtypes, metadata) assert 0 not in valmaps assert 3 not in valmaps assert 1 in valmaps assert 2 in valmaps valmap_1 = valmaps[1] valmap_2 = valmaps[2] assert len(valmap_1['val2idx']) == 2 assert len(valmap_1['idx2val']) == 2 assert valmap_1['val2idx'][0] == 0 assert valmap_1['val2idx'][1] == 1 assert valmap_1['idx2val'][0] == 0 assert valmap_1['idx2val'][1] == 1 assert len(valmap_2['val2idx']) == 3 assert len(valmap_2['idx2val']) == 3 assert valmap_2['val2idx']['one'] == 0 assert valmap_2['val2idx']['three'] == 1 assert valmap_2['val2idx']['two'] == 2 assert valmap_2['idx2val'][0] == 'one' assert valmap_2['idx2val'][1] == 'three' assert valmap_2['idx2val'][2] == 'two'
def test_gen_valmaps_default_missing_vals(smalldf): # Missing value NaN should not appear in the value map dtypes = ['continuous', 'categorical', 'categorical', 'continuous'] metadata = dict() smalldf.ix[0, 1] = float('NaN') smalldf.ix[1, 2] = float('NaN') valmaps = du.gen_valmaps(smalldf, dtypes, metadata) assert 0 not in valmaps assert 3 not in valmaps assert 1 in valmaps assert 2 in valmaps valmap_1 = valmaps[1] valmap_2 = valmaps[2] assert len(valmap_1['val2idx']) == 2 assert len(valmap_1['idx2val']) == 2 assert valmap_1['val2idx'][0] == 0 assert valmap_1['val2idx'][1] == 1 assert valmap_1['idx2val'][0] == 0 assert valmap_1['idx2val'][1] == 1 assert len(valmap_2['val2idx']) == 3 assert len(valmap_2['idx2val']) == 3 assert valmap_2['val2idx']['one'] == 0 assert valmap_2['val2idx']['three'] == 1 assert valmap_2['val2idx']['two'] == 2 assert valmap_2['idx2val'][0] == 'one' assert valmap_2['idx2val'][1] == 'three' assert valmap_2['idx2val'][2] == 'two'
def test_dataframe_to_array_all_categorical_with_missing_vals(): s_1 = pd.Series([-1, 0, 2, 1, float('NaN')]) s_2 = pd.Series(['one', 'two', 'three', 'four', float('NaN')]) df = pd.concat([s_1, s_2], axis=1) dtypes = ['categorical'] * 2 metadata = dict() valmaps = du.gen_valmaps(df, dtypes, metadata) data = du.dataframe_to_array(df, valmaps) assert data.shape == df.shape assert 'float' in str(data.dtype) assert data[0, 0] == 0 assert data[1, 0] == 1 assert data[2, 0] == 3 assert data[3, 0] == 2 assert np.isnan(data[4, 0]) assert data[0, 1] == 1 assert data[1, 1] == 3 assert data[2, 1] == 2 assert data[3, 1] == 0 assert np.isnan(data[4, 1])
def test_dataframe_to_array_all_categorical_with_missing_vals(): s_1 = pd.Series([-1, 0, 2, 1, float('NaN')]) s_2 = pd.Series(['one', 'two', 'three', 'four', float('NaN')]) df = pd.concat([s_1, s_2], axis=1) dtypes = ['categorical']*2 metadata = dict() valmaps = du.gen_valmaps(df, dtypes, metadata) data = du.dataframe_to_array(df, valmaps) assert data.shape == df.shape assert 'float' in str(data.dtype) assert data[0, 0] == 0 assert data[1, 0] == 1 assert data[2, 0] == 3 assert data[3, 0] == 2 assert np.isnan(data[4, 0]) assert data[0, 1] == 1 assert data[1, 1] == 3 assert data[2, 1] == 2 assert data[3, 1] == 0 assert np.isnan(data[4, 1])
def test_gen_valmaps_metadata(smalldf): dtypes = ['continuous', 'categorical', 'categorical', 'continuous'] metadata = {} metadata[1] = { 'dtype': 'categorical', 'values': [-1, 0, 1, 2, 99]} metadata[2] = { 'dtype': 'categorical', 'values': ['zero', 'one', 'two', 'three']} valmaps = du.gen_valmaps(smalldf, dtypes, metadata) assert 0 not in valmaps assert 3 not in valmaps assert 1 in valmaps assert 2 in valmaps valmap_1 = valmaps[1] valmap_2 = valmaps[2] assert len(valmap_1['val2idx']) == 5 assert len(valmap_1['idx2val']) == 5 # col 2 assert valmap_1['val2idx'][-1] == 0 assert valmap_1['val2idx'][0] == 1 assert valmap_1['val2idx'][1] == 2 assert valmap_1['val2idx'][2] == 3 assert valmap_1['val2idx'][99] == 4 assert valmap_1['idx2val'][0] == -1 assert valmap_1['idx2val'][1] == 0 assert valmap_1['idx2val'][2] == 1 assert valmap_1['idx2val'][3] == 2 assert valmap_1['idx2val'][4] == 99 # col 2 assert valmap_2['val2idx']['one'] == 0 assert valmap_2['val2idx']['three'] == 1 assert valmap_2['val2idx']['two'] == 2 assert valmap_2['val2idx']['zero'] == 3 assert valmap_2['idx2val'][0] == 'one' assert valmap_2['idx2val'][1] == 'three' assert valmap_2['idx2val'][2] == 'two' assert valmap_2['idx2val'][3] == 'zero'
def test_gen_valmaps_metadata(smalldf): dtypes = ['continuous', 'categorical', 'categorical', 'continuous'] metadata = {} metadata[1] = {'dtype': 'categorical', 'values': [-1, 0, 1, 2, 99]} metadata[2] = { 'dtype': 'categorical', 'values': ['zero', 'one', 'two', 'three'] } valmaps = du.gen_valmaps(smalldf, dtypes, metadata) assert 0 not in valmaps assert 3 not in valmaps assert 1 in valmaps assert 2 in valmaps valmap_1 = valmaps[1] valmap_2 = valmaps[2] assert len(valmap_1['val2idx']) == 5 assert len(valmap_1['idx2val']) == 5 # col 2 assert valmap_1['val2idx'][-1] == 0 assert valmap_1['val2idx'][0] == 1 assert valmap_1['val2idx'][1] == 2 assert valmap_1['val2idx'][2] == 3 assert valmap_1['val2idx'][99] == 4 assert valmap_1['idx2val'][0] == -1 assert valmap_1['idx2val'][1] == 0 assert valmap_1['idx2val'][2] == 1 assert valmap_1['idx2val'][3] == 2 assert valmap_1['idx2val'][4] == 99 # col 2 assert valmap_2['val2idx']['one'] == 0 assert valmap_2['val2idx']['three'] == 1 assert valmap_2['val2idx']['two'] == 2 assert valmap_2['val2idx']['zero'] == 3 assert valmap_2['idx2val'][0] == 'one' assert valmap_2['idx2val'][1] == 'three' assert valmap_2['idx2val'][2] == 'two' assert valmap_2['idx2val'][3] == 'zero'