コード例 #1
ファイル: test_compare.py プロジェクト: wavelets/copper
def test_get_set_train_test_dataset_property():
    X, Y = get_iris()
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        X, Y, test_size=0.6)

    train = np.hstack((X_train, y_train[np.newaxis].T))
    train = pd.DataFrame(train)
    train = copper.Dataset(train)
    train.role[4] = train.TARGET

    test = np.hstack((X_test, y_test[np.newaxis].T))
    test = pd.DataFrame(test)
    test = copper.Dataset(test)
    test.role[4] = test.TARGET
    # --
    mc = copper.ModelComparison()
    mc.train = train
    mc.test = test

    eq_(mc.X_train.shape, (150 * 0.4, 4))
    eq_(mc.y_train.shape, (150 * 0.4, ))
    eq_(mc.X_test.shape, (150 * 0.6, 4))
    eq_(mc.y_test.shape, (150 * 0.6, ))
    eq_(mc.X_train, X_train)
    eq_(mc.y_train, y_train)
    eq_(mc.X_test, X_test)
    eq_(mc.y_test, y_test)
コード例 #2
def test_copy_metadata():
    cols = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
    df1 = pd.DataFrame(np.random.rand(5, 10), columns=cols)
    ds1 = copper.Dataset(df1)
    ds1.role[['c', 'd', 'h', 'i']] = ds1.TARGET
    ds1.type[['b', 'c', 'g', 'i']] = ds1.CATEGORY
    # meta_old = ds1.metadata.copy()

    df2 = pd.DataFrame(np.random.rand(5, 10), columns=cols)
    ds2 = copper.Dataset(df2)
    eq_(ds2.metadata, ds1.metadata)
コード例 #3
def test_set_column():
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df)
    new_col = np.random.rand(5, 1)
    eq_(ds[3].values, df[3].values)
    ds[3] = new_col
    eq_(ds[[3]].values, new_col)
コード例 #4
def test_set_metadata_fail_length():
    df = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df)

    meta = ds.metadata.copy()
    meta = meta.drop(0)
    ds.metadata = meta
コード例 #5
def test_set_metadata_fail_index():
    df = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df)

    meta = ds.metadata.copy()
    meta = meta.reindex([11, 1, 2, 3, 4])
    ds.metadata = meta
コード例 #6
def test_set_metadata():
    df = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df)

    rand_col = math.floor(random.random() * 5)
    meta = ds.metadata.copy()
    meta['Role'][rand_col] = ds.TARGET
    eq_(ds.role[rand_col], ds.INPUT)  # Not changes until reasigment
    ds.metadata = meta
    eq_(ds.role[rand_col], ds.TARGET)  # Change

    for i in range(5):
        rand_col = math.floor(random.random() * 5)
        meta = ds.metadata.copy()
        meta['Role'][rand_col] = ds.TARGET
        ds.metadata = meta
        eq_(ds.role[rand_col], ds.TARGET)

    rand_col = math.floor(random.random() * 5)
    meta = ds.metadata.copy()
    meta['Type'][rand_col] = ds.CATEGORY
    eq_(ds.type[rand_col], ds.NUMBER)  # Not changes until reasigment
    ds.metadata = meta
    eq_(ds.type[rand_col], ds.CATEGORY)  # Change

    for i in range(5):
        rand_col = math.floor(random.random() * 5)
        meta = ds.metadata.copy()
        meta['Type'][rand_col] = ds.CATEGORY
        ds.metadata = meta
        eq_(ds.type[rand_col], ds.CATEGORY)
コード例 #7
def test_create_empty():
    # Checks empty Dataframes
    ds = copper.Dataset()
    eq_(ds.role, pd.Series())
    eq_(ds.type, pd.Series())
    eq_(ds.frame.empty, True)
    eq_(ds.metadata.empty, True)
コード例 #8
def get_iris_ds():
    X, Y = get_iris()
    df = pd.DataFrame(X)
    df['Target'] = pd.Series(Y, name='Target')

    ds = copper.Dataset(df)
    ds.role['Target'] = ds.TARGET
    return ds
コード例 #9
def test_update_cat_to_num_float():
    sol = np.arange(100) / 100
    strings = np.array(['a(%f)' % d for d in sol])
    df = pd.DataFrame(strings)
    ds = copper.Dataset(df)
    ds.type[0] = ds.NUMBER
    eq_(sol, ds[0].values)
コード例 #10
def get_train():
    X = np.ones((12, 3))
    y = ['b', 'z', 'b', 'g', 'g', 'z', 'b', 'z', 'g', 'b', 'g', 'z']
    #   [ 0,   2,   0,   1,   1,   2,   0,   2,   1,   0,   1,   0]
    df = pd.DataFrame(X)
    df['target'] = y
    ds = copper.Dataset(df)
    ds.role['target'] = ds.TARGET
    return ds
コード例 #11
def test_set_frame_different_cols_fail():
    # By failing it checks that the metadata is different == was recreated
    df1 = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df1)
    meta_old = ds.metadata.copy()

    df2 = pd.DataFrame(np.random.rand(10, 10))
    ds.frame = df2
    eq_(ds.metadata, meta_old)
コード例 #12
ファイル: test_transforms.py プロジェクト: wavelets/copper
def test_ml_target_number():
    df = pd.DataFrame(np.random.rand(8, 6))
    ds = copper.Dataset(df)

    target_col = math.floor(random.random() * 6)
    ds.role[target_col] = ds.TARGET

    le, target = copper.t.ml_target(ds)
    eq_(target, ds[target_col].values)
    eq_(le, None)
コード例 #13
def test_save_load_metadata():
    tempdir = tempfile.gettempdir()
    # Save
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df)
    ds.role[2] = ds.TARGET
    ds.role[7] = ds.IGNORE
    ds.type[1] = ds.CATEGORY
    ds.type[5] = ds.CATEGORY
    ds.metadata.to_csv(os.path.join(tempdir, 'metadata.csv'))
    # Load
    ds2 = copper.Dataset(df)
    loaded_meta = pd.read_csv(os.path.join(tempdir, 'metadata.csv'))
    loaded_meta = loaded_meta.set_index('Columns')
    ds2.metadata = loaded_meta
    eq_(ds2.role[2], ds.TARGET)
    eq_(ds2.role[7], ds.IGNORE)
    eq_(ds2.type[1], ds.CATEGORY)
    eq_(ds2.type[5], ds.CATEGORY)
コード例 #14
def test_set_frame_different_length_same_cols():
    # Tests that the metadata is mantained if columns are the same
    df1 = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df1.copy())
    ds.role[[2, 4]] = ds.TARGET
    ds.type[[1, 2]] = ds.CATEGORY
    meta_old = ds.metadata.copy()

    df2 = pd.DataFrame(np.random.rand(10, 5))
    ds.frame = df2
    eq_(ds.metadata, meta_old)
コード例 #15
def test_set_frame_different_length_same_cols_fail():
    # By failing is testing that the default metadata is not in place
    df1 = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df1.copy())
    default_meta = ds.metadata.copy()
    ds.role[[2, 4]] = ds.TARGET
    ds.type[[1, 2]] = ds.CATEGORY

    df2 = pd.DataFrame(np.random.rand(10, 5))
    ds.frame = df2
    eq_(ds.metadata, default_meta)
コード例 #16
ファイル: test_transforms.py プロジェクト: wavelets/copper
def test_ml_target_string():
    df = pd.DataFrame(np.random.rand(6, 6))
    strings = ['z', 'h', 'z', 'c', 'h', 'c']
    sol = [2, 1, 2, 0, 1, 0]
    df['T'] = strings

    ds = copper.Dataset(df)
    ds.role['T'] = ds.TARGET

    le, target = copper.t.ml_target(ds)
    eq_(target, np.array(sol))
    eq_(le.classes_.tolist(), ['c', 'h', 'z'])
コード例 #17
def test_default_type():
    df = pd.DataFrame(np.random.rand(5, 20))
    rand_col = math.floor(random.random() * 20)
    rand_col2 = math.floor(random.random() * 20)
    df[rand_col] = df[rand_col].apply(lambda x: str(x))
    df[rand_col2] = df[rand_col].apply(lambda x: str(x))
    ds = copper.Dataset(df)

    eq_(ds.type[rand_col], ds.CATEGORY)
    for col in ds.columns:
        if col not in (rand_col, rand_col2):
            eq_(ds.type[col], ds.NUMBER)
コード例 #18
def test_filter_type():
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df)
    ds.type[[0, 2, 4, 5, 9]] = ds.CATEGORY
    eq_(ds.filter(type=ds.CATEGORY), ds[[0, 2, 4, 5, 9]])

    ds.type[:] = ds.CATEGORY
    ds.type[[1, 3, 6, 7, 9]] = ds.NUMBER
    eq_(ds.filter(type=ds.NUMBER), ds[[1, 3, 6, 7, 9]])

    eq_(ds.filter(type=[ds.NUMBER, ds.CATEGORY]), df)

    eq_(ds.filter(), df)
コード例 #19
def test_set_frame_different_cols():
    # Checks default metadata is placed
    df1 = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df1)
    ds.role[[2, 4]] = ds.TARGET
    ds.type[[1, 2]] = ds.CATEGORY

    df2 = pd.DataFrame(np.random.rand(10, 10))
    ds.frame = df2
    eq_(ds.role[2], ds.INPUT)
    eq_(ds.role[4], ds.INPUT)
    eq_(ds.type[1], ds.NUMBER)
    eq_(ds.type[2], ds.NUMBER)
コード例 #20
ファイル: test_transforms.py プロジェクト: wavelets/copper
def test_ml_target_more_than_one():
    df = pd.DataFrame(np.random.rand(8, 6))
    ds = copper.Dataset(df)

    ds.role[3] = ds.TARGET
    ds.role[5] = ds.TARGET

    import warnings
    with warnings.catch_warnings():
        le, target = copper.t.ml_target(ds)
        eq_(le, None)
        eq_(target, ds[3].values)
コード例 #21
def test_create_noempty():
    df = pd.DataFrame(np.random.rand(10, 5))
    ds = copper.Dataset(df)
    eq_(ds.frame, df)
    eq_(len(ds), 10)
    eq_(len(ds), len(df))
    eq_(len(ds.role), 5)
    eq_(len(ds.type), 5)
    eq_(len(ds.metadata), 5)
    eq_(ds.metadata['Role'], ds.role)
    eq_(ds.metadata['Type'], ds.type)
    eq_(ds.index, df.index)
    eq_(ds.columns, df.columns)
    eq_(str(ds), str(ds.metadata))
コード例 #22
def test_copy_metadata_ignore_false():
    cols = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
    df1 = pd.DataFrame(np.random.rand(5, 10), columns=cols)
    ds1 = copper.Dataset(df1)
    ds1.role[['a', 'd', 'h', 'i']] = ds1.TARGET
    ds1.type[['b', 'd', 'g', 'i']] = ds1.CATEGORY

    cols = ['z', 'y', 'f', 'a', 'b', 'd', 'e']
    df2 = pd.DataFrame(np.random.rand(5, 7), columns=cols)
    ds2 = copper.Dataset(df2)
    ds2.copy_metadata(ds1.metadata, ignoreMissing=False)
    eq_(ds2.role['z'], ds1.IGNORE)
    eq_(ds2.role['y'], ds1.IGNORE)
    eq_(ds2.role['a'], ds1.TARGET)
    eq_(ds2.role['b'], ds1.INPUT)
    eq_(ds2.role['d'], ds1.TARGET)
    eq_(ds2.role['e'], ds1.INPUT)

    eq_(ds2.type['z'], ds1.NUMBER)
    eq_(ds2.type['y'], ds1.NUMBER)
    eq_(ds2.type['a'], ds1.NUMBER)
    eq_(ds2.type['b'], ds1.CATEGORY)
    eq_(ds2.type['d'], ds1.CATEGORY)
    eq_(ds2.type['e'], ds1.NUMBER)
コード例 #23
ファイル: test_transforms.py プロジェクト: wavelets/copper
def test_ml_inputs_simple_with_target():
    df = pd.DataFrame(np.random.rand(8, 6))
    strings = ['1', '2', '1', '3', '5', '2', '1', '5']
    df[1] = np.array(strings)
    df[3] = np.array(strings)
    ds = copper.Dataset(df)
    ds.type[[1, 3]] = ds.CATEGORY
    ds.role[[2]] = ds.TARGET

    ans = copper.t.ml_inputs(ds)
    eq_(ans.shape, (8, 5 - 2 + 4 * 2))
    eq_(ans[:, 0], df[0].values)
    eq_(ans[:, [1, 2, 3, 4]], copper.t.cat_encode(df[1].values))
    eq_(ans[:, [5, 6, 7, 8]], copper.t.cat_encode(df[3].values))
    eq_(ans[:, 9], df[4].values)
    eq_(ans[:, 10], df[5].values)
コード例 #24
def test_filter_role():
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df)
    ds.role[[0, 2, 4, 5, 9]] = ds.IGNORE
    eq_(ds.filter(role=ds.INPUT), ds[[1, 3, 6, 7, 8]])

    ds.role[:] = ds.IGNORE
    ds.role[[1, 3, 4, 6, 8]] = ds.INPUT
    eq_(ds.filter(role=ds.INPUT), ds[[1, 3, 4, 6, 8]])

    ds.role[[2, 9]] = ds.TARGET
    eq_(ds.filter(role=ds.TARGET), ds[[2, 9]])

    eq_(ds.filter(role=[ds.INPUT, ds.TARGET]), ds[[1, 2, 3, 4, 6, 8, 9]])

    eq_(ds.filter(), df)
コード例 #25
ファイル: test_transforms.py プロジェクト: wavelets/copper
def test_ml_inputs_simple():
    df = pd.DataFrame(np.random.rand(8, 6))
    strings = ['1', '2', '1', '3', '5', '2', '1', '5']
    df[1] = np.array(strings)
    df[3] = np.array(strings)
    ds = copper.Dataset(df)
    ds.type[[1, 3]] = ds.CATEGORY

    ans = copper.t.ml_inputs(ds)
    eq_(ans.shape, (8, 6 - 2 + 4 * 2))
    eq_(ans[:, 0], df[0].values)
    eq_(ans[:, [1, 2, 3, 4]], copper.t.cat_encode(df[1].values))
    eq_(ans[:, 5], df[2].values)
    eq_(ans[:, [6, 7, 8, 9]], copper.t.cat_encode(df[3].values))
    eq_(ans[:, 10], df[4].values)
    eq_(ans[:, 11], df[5].values)
コード例 #26
ファイル: test_transforms.py プロジェクト: wavelets/copper
def test_ml_inputs_big():
    abc = 'abcdefghijklmnopqrstuvwxyz'
    m, n = 1000, 10
    array = np.floor(np.random.rand(m) * 26)
    strings = np.array([abc[int(i)] for i in array])
    df = pd.DataFrame(np.random.rand(m, 100))
    abc_cols = np.arange(n) * 10
    for col in abc_cols:
        df[col] = strings
    ds = copper.Dataset(df)
    ds.type[abc_cols.tolist()] = ds.CATEGORY

    ans = copper.t.ml_inputs(ds)
    eq_(ans.shape, (m, 100 - n + 26 * n))
    encoded = copper.t.cat_encode(strings)
    for i, abc_col in enumerate(abc_cols):
        s = abc_col + 25 * i
        f = abc_col + 25 * i + 26
        eq_(ans[:, s:f], encoded)
コード例 #27
def test_filter_role_and_type():
    df = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df)
    ds.role[:] = ds.IGNORE

    ds.role[2] = ds.INPUT
    ds.type[2] = ds.CATEGORY
    eq_(ds.filter(role=ds.INPUT, type=ds.CATEGORY), df[[2]])

    ds.role[4] = ds.INPUT
    ds.type[4] = ds.CATEGORY
    eq_(ds.filter(role=ds.INPUT, type=ds.CATEGORY), df[[2, 4]])

    eq_(ds.filter(role=ds.IGNORE, type=ds.NUMBER), df[[0, 1, 3]])

    ds.role[4] = ds.IGNORE
    eq_(ds.filter(role=ds.INPUT, type=ds.CATEGORY), df[[2]])

    eq_(ds.filter(), df)
コード例 #28
def test_create_empty_and_set():
    df = pd.DataFrame(np.random.rand(10, 5))
    ds = copper.Dataset()
    eq_(ds.role, pd.Series())
    eq_(ds.type, pd.Series())
    eq_(ds.metadata.empty, True)
    eq_(ds.frame.empty, True)

    ds.frame = df.copy()
    eq_(ds.frame, df)
    eq_(len(ds), 10)
    eq_(len(ds), len(df))
    eq_(len(ds.role), 5)
    eq_(len(ds.type), 5)
    eq_(len(ds.metadata), 5)
    eq_(ds.metadata['Role'], ds.role)
    eq_(ds.metadata['Type'], ds.type)
    eq_(ds.index, df.index)
    eq_(ds.columns, df.columns)
    eq_(str(ds), str(ds.metadata))
    eq_(unicode(ds), unicode(ds.metadata))
コード例 #29
def test_tail():
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df.copy())
    l = math.floor(random.random() * 10)
    eq_(ds.head(l), df.head(l))
コード例 #30
def test_get_column():
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df)
    eq_(ds[0], df[0])
    eq_(ds[5], df[5])
    eq_(ds[9], df[9])