Esempio n. 1
0
def test_iris_accuracy():
    dbn = DBN([25], pretrain_epochs=0, finetune_epochs=10, finetune_batch_size=10, random_state=1)
    X, y = get_iris()
    dbn.fit(X, y)

    acc = (dbn.predict(X) == y).mean()
    eq_(acc, 0.95333, 5)
Esempio n. 2
0
def test_set_column():
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df)
    new_col = np.random.rand(5, 1)
    eq_(ds[3].values, df[3].values)
    ds[3] = new_col
    eq_(ds[[3]].values, new_col)
Esempio n. 3
0
def test_set_column():
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df)
    new_col = np.random.rand(5, 1)
    eq_(ds[3].values, df[3].values)
    ds[3] = new_col
    eq_(ds[[3]].values, new_col)
Esempio n. 4
0
def test_update_cat_to_num_float():
    sol = np.arange(100) / 100
    strings = np.array(['a(%f)' % d for d in sol])
    df = pd.DataFrame(strings)
    ds = copper.Dataset(df)
    ds.type[0] = ds.NUMBER
    ds.update()
    eq_(sol, ds[0].values)
Esempio n. 5
0
def test_update_cat_to_num_float():
    sol = np.arange(100) / 100
    strings = np.array(['a(%f)' % d for d in sol])
    df = pd.DataFrame(strings)
    ds = copper.Dataset(df)
    ds.type[0] = ds.NUMBER
    ds.update()
    eq_(sol, ds[0].values)
Esempio n. 6
0
def test_transform_float():
    array = np.arange(10) / 10
    strings = []
    for i, item in enumerate(array):
        strings.append("STRING(%f)" % item)
    ser = pd.Series(strings)
    sol = pd.Series(array)
    eq_(ser.apply(copper.t.to_float), sol)
Esempio n. 7
0
def test_transform_float():
    array = np.arange(10) / 10
    strings = []
    for i, item in enumerate(array):
        strings.append("STRING(%f)" % item)
    ser = pd.Series(strings)
    sol = pd.Series(array)
    eq_(ser.apply(copper.t.to_float), sol)
Esempio n. 8
0
def test_set_frame_different_cols_fail():
    # By failing it checks that the metadata is different == was recreated
    df1 = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df1)
    meta_old = ds.metadata.copy()

    df2 = pd.DataFrame(np.random.rand(10, 10))
    ds.frame = df2
    eq_(ds.metadata, meta_old)
Esempio n. 9
0
def get_iris_ds_string():
    ds = get_iris_ds()
    ds.type['Target'] = ds.CATEGORY
    ds['Target'] = ds['Target'].apply(lambda x: str(x))
    ds['Target'][ds['Target'] == '0'] = 'Iris-A'
    ds['Target'][ds['Target'] == '1'] = 'Iris-B'
    ds['Target'][ds['Target'] == '2'] = 'Iris-C'
    eq_(ds.metadata['dtype']['Target'], object)
    return ds
Esempio n. 10
0
def test_set_frame_different_cols_fail():
    # By failing it checks that the metadata is different == was recreated
    df1 = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df1)
    meta_old = ds.metadata.copy()

    df2 = pd.DataFrame(np.random.rand(10, 10))
    ds.frame = df2
    eq_(ds.metadata, meta_old)
Esempio n. 11
0
def get_iris_ds_string():
    ds = get_iris_ds()
    ds.type['Target'] = ds.CATEGORY
    ds['Target'] = ds['Target'].apply(lambda x: str(x))
    ds['Target'][ds['Target'] == '0'] = 'Iris-A'
    ds['Target'][ds['Target'] == '1'] = 'Iris-B'
    ds['Target'][ds['Target'] == '2'] = 'Iris-C'
    eq_(ds.metadata['dtype']['Target'], object)
    return ds
Esempio n. 12
0
def test_ml_target_number():
    df = pd.DataFrame(np.random.rand(8, 6))
    ds = copper.Dataset(df)

    target_col = math.floor(random.random() * 6)
    ds.role[target_col] = ds.TARGET

    le, target = copper.t.ml_target(ds)
    eq_(target, ds[target_col].values)
    eq_(le, None)
Esempio n. 13
0
def test_ml_target_number():
    df = pd.DataFrame(np.random.rand(8, 6))
    ds = copper.Dataset(df)

    target_col = math.floor(random.random() * 6)
    ds.role[target_col] = ds.TARGET

    le, target = copper.t.ml_target(ds)
    eq_(target, ds[target_col].values)
    eq_(le, None)
Esempio n. 14
0
def test_set_frame_different_length_same_cols_fail():
    # By failing is testing that the default metadata is not in place
    df1 = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df1.copy())
    default_meta = ds.metadata.copy()
    ds.role[[2, 4]] = ds.TARGET
    ds.type[[1, 2]] = ds.CATEGORY

    df2 = pd.DataFrame(np.random.rand(10, 5))
    ds.frame = df2
    eq_(ds.metadata, default_meta)
Esempio n. 15
0
def test_set_frame_different_length_same_cols_fail():
    # By failing is testing that the default metadata is not in place
    df1 = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df1.copy())
    default_meta = ds.metadata.copy()
    ds.role[[2, 4]] = ds.TARGET
    ds.type[[1, 2]] = ds.CATEGORY

    df2 = pd.DataFrame(np.random.rand(10, 5))
    ds.frame = df2
    eq_(ds.metadata, default_meta)
Esempio n. 16
0
def test_set_frame_different_length_same_cols():
    # Tests that the metadata is mantained if columns are the same
    df1 = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df1.copy())
    ds.role[[2, 4]] = ds.TARGET
    ds.type[[1, 2]] = ds.CATEGORY
    meta_old = ds.metadata.copy()

    df2 = pd.DataFrame(np.random.rand(10, 5))
    ds.frame = df2
    eq_(ds.metadata, meta_old)
Esempio n. 17
0
def test_set_frame_different_length_same_cols():
    # Tests that the metadata is mantained if columns are the same
    df1 = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df1.copy())
    ds.role[[2, 4]] = ds.TARGET
    ds.type[[1, 2]] = ds.CATEGORY
    meta_old = ds.metadata.copy()

    df2 = pd.DataFrame(np.random.rand(10, 5))
    ds.frame = df2
    eq_(ds.metadata, meta_old)
Esempio n. 18
0
def test_iris_accuracy():
    dbn = DBN([25],
              pretrain_epochs=0,
              finetune_epochs=10,
              finetune_batch_size=10,
              random_state=1)
    X, y = get_iris()
    dbn.fit(X, y)

    acc = (dbn.predict(X) == y).mean()
    eq_(acc, 0.95333, 5)
Esempio n. 19
0
def test_ml_target_string():
    df = pd.DataFrame(np.random.rand(6, 6))
    strings = ['z', 'h', 'z', 'c', 'h', 'c']
    sol = [2, 1, 2, 0, 1, 0]
    df['T'] = strings

    ds = copper.Dataset(df)
    ds.role['T'] = ds.TARGET

    le, target = copper.t.ml_target(ds)
    eq_(target, np.array(sol))
    eq_(le.classes_.tolist(), ['c', 'h', 'z'])
Esempio n. 20
0
def test_ml_target_string():
    df = pd.DataFrame(np.random.rand(6, 6))
    strings = ['z', 'h', 'z', 'c', 'h', 'c']
    sol = [2, 1, 2, 0, 1, 0]
    df['T'] = strings

    ds = copper.Dataset(df)
    ds.role['T'] = ds.TARGET

    le, target = copper.t.ml_target(ds)
    eq_(target, np.array(sol))
    eq_(le.classes_.tolist(), ['c', 'h', 'z'])
Esempio n. 21
0
def test_default_type():
    df = pd.DataFrame(np.random.rand(5, 20))
    rand_col = math.floor(random.random() * 20)
    rand_col2 = math.floor(random.random() * 20)
    df[rand_col] = df[rand_col].apply(lambda x: str(x))
    df[rand_col2] = df[rand_col].apply(lambda x: str(x))
    ds = copper.Dataset(df)

    eq_(ds.type[rand_col], ds.CATEGORY)
    for col in ds.columns:
        if col not in (rand_col, rand_col2):
            eq_(ds.type[col], ds.NUMBER)
Esempio n. 22
0
def test_default_type():
    df = pd.DataFrame(np.random.rand(5, 20))
    rand_col = math.floor(random.random() * 20)
    rand_col2 = math.floor(random.random() * 20)
    df[rand_col] = df[rand_col].apply(lambda x: str(x))
    df[rand_col2] = df[rand_col].apply(lambda x: str(x))
    ds = copper.Dataset(df)

    eq_(ds.type[rand_col], ds.CATEGORY)
    for col in ds.columns:
        if col not in (rand_col, rand_col2):
            eq_(ds.type[col], ds.NUMBER)
Esempio n. 23
0
def test_copy_metadata():
    cols = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
    df1 = pd.DataFrame(np.random.rand(5, 10), columns=cols)
    ds1 = copper.Dataset(df1)
    ds1.role[['c', 'd', 'h', 'i']] = ds1.TARGET
    ds1.type[['b', 'c', 'g', 'i']] = ds1.CATEGORY
    # meta_old = ds1.metadata.copy()

    df2 = pd.DataFrame(np.random.rand(5, 10), columns=cols)
    ds2 = copper.Dataset(df2)
    ds2.copy_metadata(ds1.metadata)
    eq_(ds2.metadata, ds1.metadata)
Esempio n. 24
0
def test_copy_metadata():
    cols = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
    df1 = pd.DataFrame(np.random.rand(5, 10), columns=cols)
    ds1 = copper.Dataset(df1)
    ds1.role[['c', 'd', 'h', 'i']] = ds1.TARGET
    ds1.type[['b', 'c', 'g', 'i']] = ds1.CATEGORY
    # meta_old = ds1.metadata.copy()

    df2 = pd.DataFrame(np.random.rand(5, 10), columns=cols)
    ds2 = copper.Dataset(df2)
    ds2.copy_metadata(ds1.metadata)
    eq_(ds2.metadata, ds1.metadata)
Esempio n. 25
0
def test_ml_target_more_than_one():
    df = pd.DataFrame(np.random.rand(8, 6))
    ds = copper.Dataset(df)

    ds.role[3] = ds.TARGET
    ds.role[5] = ds.TARGET

    import warnings
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        le, target = copper.t.ml_target(ds)
        eq_(le, None)
        eq_(target, ds[3].values)
Esempio n. 26
0
def test_ml_target_more_than_one():
    df = pd.DataFrame(np.random.rand(8, 6))
    ds = copper.Dataset(df)

    ds.role[3] = ds.TARGET
    ds.role[5] = ds.TARGET

    import warnings
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        le, target = copper.t.ml_target(ds)
        eq_(le, None)
        eq_(target, ds[3].values)
Esempio n. 27
0
def test_create_empty():
    # Checks empty Dataframes
    ds = copper.Dataset()
    eq_(ds.role, pd.Series())
    eq_(ds.type, pd.Series())
    eq_(ds.frame.empty, True)
    eq_(ds.metadata.empty, True)
Esempio n. 28
0
def test_save_load_weights():
    import tempfile
    tempdir = tempfile.gettempdir()
    tempfile = os.path.join(tempdir, 'w.json')
    # tempfile = os.path.join('', 'w.json')

    dbn1 = DBN([5], random_state=1234)
    X, y = get_iris()
    dbn1.fit(X, y)
    pred1 = dbn1.predict(X)
    prob1 = dbn1.predict_proba(X)

    dbn1.save(tempfile)

    dbn2 = DBN([5])
    dbn2.load(tempfile)
    pred2 = dbn2.predict(X)
    prob2 = dbn2.predict_proba(X)

    eq_(dbn1.coef_, dbn2.coef_)
    for i, layer in enumerate(dbn1.layers):
        eq_(dbn1.layers[i].W, dbn2.layers[i].W)

    eq_(pred1, pred2)
    eq_(prob1, prob2)
Esempio n. 29
0
def test_create_empty():
    # Checks empty Dataframes
    ds = copper.Dataset()
    eq_(ds.role, pd.Series())
    eq_(ds.type, pd.Series())
    eq_(ds.frame.empty, True)
    eq_(ds.metadata.empty, True)
Esempio n. 30
0
def test_save_load_weights():
    import tempfile
    tempdir = tempfile.gettempdir()
    tempfile = os.path.join(tempdir, 'w.json')
    # tempfile = os.path.join('', 'w.json')

    dbn1 = DBN([5], random_state=1234)
    X, y = get_iris()
    dbn1.fit(X, y)
    pred1 = dbn1.predict(X)
    prob1 = dbn1.predict_proba(X)

    dbn1.save(tempfile)

    dbn2 = DBN([5])
    dbn2.load(tempfile)
    pred2 = dbn2.predict(X)
    prob2 = dbn2.predict_proba(X)

    eq_(dbn1.coef_, dbn2.coef_)
    for i, layer in enumerate(dbn1.layers):
        eq_(dbn1.layers[i].W, dbn2.layers[i].W)

    eq_(pred1, pred2)
    eq_(prob1, prob2)
def test_fbeta_score(mc=None):
    mc = get_mc() if mc is None else mc
    score = mc.fbeta_score(beta=0.1)
    eq_(score['SVM'], 0.976249, 6)
    eq_(score['LR'], 0.914067, 6)
    score = mc.fbeta_score()
    eq_(score['SVM'], 0.973952, 6)
    eq_(score['LR'], 0.870540, 6)
Esempio n. 32
0
def test_fbeta_score(mc=None):
    mc = get_mc() if mc is None else mc
    score = mc.fbeta_score(beta=0.1)
    eq_(score['SVM'], 0.976249, 6)
    eq_(score['LR'], 0.914067, 6)
    score = mc.fbeta_score()
    eq_(score['SVM'], 0.973952, 6)
    eq_(score['LR'], 0.870540, 6)
Esempio n. 33
0
def test_filter_role():
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df)
    ds.role[[0, 2, 4, 5, 9]] = ds.IGNORE
    eq_(ds.filter(role=ds.INPUT), ds[[1, 3, 6, 7, 8]])

    ds.role[:] = ds.IGNORE
    ds.role[[1, 3, 4, 6, 8]] = ds.INPUT
    eq_(ds.filter(role=ds.INPUT), ds[[1, 3, 4, 6, 8]])

    ds.role[[2, 9]] = ds.TARGET
    eq_(ds.filter(role=ds.TARGET), ds[[2, 9]])

    eq_(ds.filter(role=[ds.INPUT, ds.TARGET]), ds[[1, 2, 3, 4, 6, 8, 9]])

    eq_(ds.filter(), df)
Esempio n. 34
0
def test_filter_role():
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df)
    ds.role[[0, 2, 4, 5, 9]] = ds.IGNORE
    eq_(ds.filter(role=ds.INPUT), ds[[1, 3, 6, 7, 8]])

    ds.role[:] = ds.IGNORE
    ds.role[[1, 3, 4, 6, 8]] = ds.INPUT
    eq_(ds.filter(role=ds.INPUT), ds[[1, 3, 4, 6, 8]])

    ds.role[[2, 9]] = ds.TARGET
    eq_(ds.filter(role=ds.TARGET), ds[[2, 9]])

    eq_(ds.filter(role=[ds.INPUT, ds.TARGET]), ds[[1, 2, 3, 4, 6, 8, 9]])

    eq_(ds.filter(), df)
Esempio n. 35
0
def test_coef_eq_layers_1():
    dbn = DBN([5], pretrain_epochs=0, finetune_epochs=1, random_state=1234)
    X, y = get_iris()
    dbn.fit(X, y)

    eq_(dbn.coef_[:5], dbn.layers[0].b)
    eq_(dbn.coef_[5:25], dbn.layers[0].W.reshape(-1))
    eq_(dbn.coef_[25:28], dbn.layers[1].b)
    eq_(dbn.coef_[28:], dbn.layers[1].W.reshape(-1))
Esempio n. 36
0
def test_coef_eq_layers_1():
    dbn = DBN([5], pretrain_epochs=0, finetune_epochs=1, random_state=1234)
    X, y = get_iris()
    dbn.fit(X, y)

    eq_(dbn.coef_[:5], dbn.layers[0].b)
    eq_(dbn.coef_[5:25], dbn.layers[0].W.reshape(-1))
    eq_(dbn.coef_[25:28], dbn.layers[1].b)
    eq_(dbn.coef_[28:], dbn.layers[1].W.reshape(-1))
Esempio n. 37
0
def get_mc_string():
    ds = get_iris_ds()
    ds.type['Target'] = ds.CATEGORY
    ds['Target'] = ds['Target'].apply(lambda x: str(x))
    ds['Target'][ds['Target'] == '0'] = 'Iris-setosa'
    ds['Target'][ds['Target'] == '1'] = 'Iris-versicolor'
    ds['Target'][ds['Target'] == '2'] = 'Iris-virginica'
    eq_(ds.metadata['dtype']['Target'], object)

    mc = copper.ModelComparison()
    mc.train_test_split(ds, random_state=0)

    from sklearn.linear_model import LogisticRegression
    from sklearn.svm import SVC
    mc['LR'] = LogisticRegression()
    mc['SVM'] = SVC(probability=True)
    mc.fit()
    return mc
def get_mc_string():
    ds = get_iris_ds()
    ds.type['Target'] = ds.CATEGORY
    ds['Target'] = ds['Target'].apply(lambda x: str(x))
    ds['Target'][ds['Target'] == '0'] = 'Iris-setosa'
    ds['Target'][ds['Target'] == '1'] = 'Iris-versicolor'
    ds['Target'][ds['Target'] == '2'] = 'Iris-virginica'
    eq_(ds.metadata['dtype']['Target'], object)

    mc = copper.ModelComparison()
    mc.train_test_split(ds, random_state=0)

    from sklearn.linear_model import LogisticRegression
    from sklearn.svm import SVC
    mc['LR'] = LogisticRegression()
    mc['SVM'] = SVC(probability=True)
    mc.fit()
    return mc
Esempio n. 39
0
def test_ml_inputs_big():
    abc = 'abcdefghijklmnopqrstuvwxyz'
    m, n = 1000, 10
    array = np.floor(np.random.rand(m) * 26)
    strings = np.array([abc[int(i)] for i in array])
    df = pd.DataFrame(np.random.rand(m, 100))
    abc_cols = np.arange(n) * 10
    for col in abc_cols:
        df[col] = strings
    ds = copper.Dataset(df)
    ds.type[abc_cols.tolist()] = ds.CATEGORY

    ans = copper.t.ml_inputs(ds)
    eq_(ans.shape, (m, 100 - n + 26 * n))
    encoded = copper.t.cat_encode(strings)
    for i, abc_col in enumerate(abc_cols):
        s = abc_col + 25 * i
        f = abc_col + 25 * i + 26
        eq_(ans[:, s:f], encoded)
Esempio n. 40
0
def test_filter_role_and_type():
    df = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df)
    ds.role[:] = ds.IGNORE

    ds.role[2] = ds.INPUT
    ds.type[2] = ds.CATEGORY
    eq_(ds.filter(role=ds.INPUT, type=ds.CATEGORY), df[[2]])

    ds.role[4] = ds.INPUT
    ds.type[4] = ds.CATEGORY
    eq_(ds.filter(role=ds.INPUT, type=ds.CATEGORY), df[[2, 4]])

    eq_(ds.filter(role=ds.IGNORE, type=ds.NUMBER), df[[0, 1, 3]])

    ds.role[4] = ds.IGNORE
    eq_(ds.filter(role=ds.INPUT, type=ds.CATEGORY), df[[2]])

    eq_(ds.filter(), df)
Esempio n. 41
0
def test_ml_inputs_big():
    abc = 'abcdefghijklmnopqrstuvwxyz'
    m, n = 1000, 10
    array = np.floor(np.random.rand(m) * 26)
    strings = np.array([abc[int(i)] for i in array])
    df = pd.DataFrame(np.random.rand(m, 100))
    abc_cols = np.arange(n) * 10
    for col in abc_cols:
        df[col] = strings
    ds = copper.Dataset(df)
    ds.type[abc_cols.tolist()] = ds.CATEGORY

    ans = copper.t.ml_inputs(ds)
    eq_(ans.shape, (m, 100 - n + 26 * n))
    encoded = copper.t.cat_encode(strings)
    for i, abc_col in enumerate(abc_cols):
        s = abc_col + 25 * i
        f = abc_col + 25 * i + 26
        eq_(ans[:, s:f], encoded)
Esempio n. 42
0
def test_filter_role_and_type():
    df = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df)
    ds.role[:] = ds.IGNORE

    ds.role[2] = ds.INPUT
    ds.type[2] = ds.CATEGORY
    eq_(ds.filter(role=ds.INPUT, type=ds.CATEGORY), df[[2]])

    ds.role[4] = ds.INPUT
    ds.type[4] = ds.CATEGORY
    eq_(ds.filter(role=ds.INPUT, type=ds.CATEGORY), df[[2, 4]])

    eq_(ds.filter(role=ds.IGNORE, type=ds.NUMBER), df[[0, 1, 3]])

    ds.role[4] = ds.IGNORE
    eq_(ds.filter(role=ds.INPUT, type=ds.CATEGORY), df[[2]])

    eq_(ds.filter(), df)
Esempio n. 43
0
def test_filter_type():
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df)
    ds.type[[0, 2, 4, 5, 9]] = ds.CATEGORY
    eq_(ds.filter(type=ds.CATEGORY), ds[[0, 2, 4, 5, 9]])

    ds.type[:] = ds.CATEGORY
    ds.type[[1, 3, 6, 7, 9]] = ds.NUMBER
    eq_(ds.filter(type=ds.NUMBER), ds[[1, 3, 6, 7, 9]])

    eq_(ds.filter(type=[ds.NUMBER, ds.CATEGORY]), df)

    eq_(ds.filter(), df)
Esempio n. 44
0
def test_set_frame_different_cols():
    # Checks default metadata is placed
    df1 = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df1)
    ds.role[[2, 4]] = ds.TARGET
    ds.type[[1, 2]] = ds.CATEGORY

    df2 = pd.DataFrame(np.random.rand(10, 10))
    ds.frame = df2
    eq_(ds.role[2], ds.INPUT)
    eq_(ds.role[4], ds.INPUT)
    eq_(ds.type[1], ds.NUMBER)
    eq_(ds.type[2], ds.NUMBER)
Esempio n. 45
0
def test_filter_type():
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df)
    ds.type[[0, 2, 4, 5, 9]] = ds.CATEGORY
    eq_(ds.filter(type=ds.CATEGORY), ds[[0, 2, 4, 5, 9]])

    ds.type[:] = ds.CATEGORY
    ds.type[[1, 3, 6, 7, 9]] = ds.NUMBER
    eq_(ds.filter(type=ds.NUMBER), ds[[1, 3, 6, 7, 9]])

    eq_(ds.filter(type=[ds.NUMBER, ds.CATEGORY]), df)

    eq_(ds.filter(), df)
Esempio n. 46
0
def test_set_frame_different_cols():
    # Checks default metadata is placed
    df1 = pd.DataFrame(np.random.rand(5, 5))
    ds = copper.Dataset(df1)
    ds.role[[2, 4]] = ds.TARGET
    ds.type[[1, 2]] = ds.CATEGORY

    df2 = pd.DataFrame(np.random.rand(10, 10))
    ds.frame = df2
    eq_(ds.role[2], ds.INPUT)
    eq_(ds.role[4], ds.INPUT)
    eq_(ds.type[1], ds.NUMBER)
    eq_(ds.type[2], ds.NUMBER)
Esempio n. 47
0
def test_cat_encode_big():
    abc = 'abcdefghijklmnopqrstuvwxyz'
    array = np.floor(np.random.rand(100000) * 26)
    strings = np.array([abc[int(i)] for i in array])
    ans = copper.t.cat_encode(strings)
    eq_(len(ans), 100000)
    eq_(ans.sum(axis=1), np.ones(100000))
    eq_(ans.sum(), 100000)
Esempio n. 48
0
def test_cat_encode_big():
    abc = 'abcdefghijklmnopqrstuvwxyz'
    array = np.floor(np.random.rand(100000) * 26)
    strings = np.array([abc[int(i)] for i in array])
    ans = copper.t.cat_encode(strings)
    eq_(len(ans), 100000)
    eq_(ans.sum(axis=1), np.ones(100000))
    eq_(ans.sum(), 100000)
Esempio n. 49
0
def test_get_set_algorithms():
    mc = copper.ModelComparison()
    lr = LogisticRegression()
    mc['LR'] = lr
    eq_(mc['LR'], lr)

    lr2 = LogisticRegression(penalty='l1')
    mc['LR l1'] = lr2
    eq_(mc['LR l1'], lr2)
    eq_(len(mc), 2)
Esempio n. 50
0
def test_get_set_algorithms():
    mc = copper.ModelComparison()
    lr = LogisticRegression()
    mc['LR'] = lr
    eq_(mc['LR'], lr)

    lr2 = LogisticRegression(penalty='l1')
    mc['LR l1'] = lr2
    eq_(mc['LR l1'], lr2)
    eq_(len(mc), 2)
Esempio n. 51
0
def test_save_load_metadata():
    tempdir = tempfile.gettempdir()
    # Save
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df)
    ds.role[2] = ds.TARGET
    ds.role[7] = ds.IGNORE
    ds.type[1] = ds.CATEGORY
    ds.type[5] = ds.CATEGORY
    ds.metadata.to_csv(os.path.join(tempdir, 'metadata.csv'))
    # Load
    ds2 = copper.Dataset(df)
    loaded_meta = pd.read_csv(os.path.join(tempdir, 'metadata.csv'))
    loaded_meta = loaded_meta.set_index('Columns')
    ds2.metadata = loaded_meta
    eq_(ds2.role[2], ds.TARGET)
    eq_(ds2.role[7], ds.IGNORE)
    eq_(ds2.type[1], ds.CATEGORY)
    eq_(ds2.type[5], ds.CATEGORY)
Esempio n. 52
0
def test_save_load_metadata():
    tempdir = tempfile.gettempdir()
    # Save
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df)
    ds.role[2] = ds.TARGET
    ds.role[7] = ds.IGNORE
    ds.type[1] = ds.CATEGORY
    ds.type[5] = ds.CATEGORY
    ds.metadata.to_csv(os.path.join(tempdir, 'metadata.csv'))
    # Load
    ds2 = copper.Dataset(df)
    loaded_meta = pd.read_csv(os.path.join(tempdir, 'metadata.csv'))
    loaded_meta = loaded_meta.set_index('Columns')
    ds2.metadata = loaded_meta
    eq_(ds2.role[2], ds.TARGET)
    eq_(ds2.role[7], ds.IGNORE)
    eq_(ds2.type[1], ds.CATEGORY)
    eq_(ds2.type[5], ds.CATEGORY)
Esempio n. 53
0
def test_deleted_algorithm():
    mc = copper.ModelComparison()
    lr = LogisticRegression()
    mc['LR'] = lr
    eq_(mc['LR'], lr)

    lr2 = LogisticRegression(penalty='l1')
    mc['LR l1'] = lr2
    eq_(mc['LR l1'], lr2)

    del mc['LR']
    eq_(mc['LR l1'], lr2)  # Not deleted
    mc['LR']  # deleted
Esempio n. 54
0
def test_deleted_algorithm():
    mc = copper.ModelComparison()
    lr = LogisticRegression()
    mc['LR'] = lr
    eq_(mc['LR'], lr)

    lr2 = LogisticRegression(penalty='l1')
    mc['LR l1'] = lr2
    eq_(mc['LR l1'], lr2)

    del mc['LR']
    eq_(mc['LR l1'], lr2)  # Not deleted
    mc['LR']  # deleted
Esempio n. 55
0
def test_reproducible():
    X, y = get_iris()

    dbn1 = DBN([5], random_state=123)
    dbn1.fit(X, y)
    pred1 = dbn1.predict(X)
    prob1 = dbn1.predict_proba(X)

    dbn2 = DBN([5], random_state=123)
    dbn2.fit(X, y)
    pred2 = dbn2.predict(X)
    prob2 = dbn2.predict_proba(X)

    eq_(dbn1.coef_, dbn2.coef_)
    eq_(pred1, pred2)
    eq_(prob1, prob2)
Esempio n. 56
0
def test_reproducible():
    X, y = get_iris()

    dbn1 = DBN([5], random_state=123)
    dbn1.fit(X, y)
    pred1 = dbn1.predict(X)
    prob1 = dbn1.predict_proba(X)

    dbn2 = DBN([5], random_state=123)
    dbn2.fit(X, y)
    pred2 = dbn2.predict(X)
    prob2 = dbn2.predict_proba(X)

    eq_(dbn1.coef_, dbn2.coef_)
    eq_(pred1, pred2)
    eq_(prob1, prob2)
Esempio n. 57
0
def test_hinge_loss(mc=None):
    mc = get_mc() if mc is None else mc
    score = mc.hinge_loss()
    eq_(score['SVM'], 1.921052, 4)
    eq_(score['LR'], 2.026315, 4)
Esempio n. 58
0
def test_create_empty_and_set():
    df = pd.DataFrame(np.random.rand(10, 5))
    ds = copper.Dataset()
    eq_(ds.role, pd.Series())
    eq_(ds.type, pd.Series())
    eq_(ds.metadata.empty, True)
    eq_(ds.frame.empty, True)

    ds.frame = df.copy()
    eq_(ds.frame, df)
    eq_(len(ds), 10)
    eq_(len(ds), len(df))
    eq_(len(ds.role), 5)
    eq_(len(ds.type), 5)
    eq_(len(ds.metadata), 5)
    eq_(ds.metadata['Role'], ds.role)
    eq_(ds.metadata['Type'], ds.type)
    eq_(ds.index, df.index)
    eq_(ds.columns, df.columns)
    eq_(str(ds), str(ds.metadata))
    eq_(unicode(ds), unicode(ds.metadata))
Esempio n. 59
0
def test_precision_score(mc=None):
    mc = get_mc() if mc is None else mc
    score = mc.precision_score()
    eq_(score['SVM'], 0.976316, 6)
    eq_(score['LR'], 0.915414, 6)
Esempio n. 60
0
def test_tail():
    df = pd.DataFrame(np.random.rand(5, 10))
    ds = copper.Dataset(df.copy())
    l = math.floor(random.random() * 10)
    eq_(ds.head(l), df.head(l))