コード例 #1
0
def test_generate_sub_file(tmpdir):
    preds = np.arange(10)
    id_sub = np.arange(10)
    sub_file_path = tmpdir + 'file.csv'
    ms.generate_sub_file(preds, id_sub, sub_file_path)
    result_df = DDF.from_csv(sub_file_path)
    assert result_df.shape[1] == 2
    assert result_df.shape[0] == 10
コード例 #2
0
def test_make_submission_file(tmpdir):
    sub_path = str(tmpdir) + 'sub.csv'
    model_params = {'min_data': 1, 'min_data_in_bin': 1}
    ms.make_submission_file(train_df, test_df, sub_path, extra_model_params=model_params)
    result_df = DDF.from_csv(sub_path)
    assert 'SalePrice' in result_df.columns
    assert 'Id' in result_df.columns
    assert len(result_df.columns) == 2
コード例 #3
0
ファイル: test_eval_model.py プロジェクト: chechir/warsaw
def test_eval_model_on_cv(tmpdir):
    df = load_df(TRAIN_PATH, nrows=10)
    model_params = {'min_data': 1, 'min_data_in_bin': 1}
    log_path = str(tmpdir) + 'file.csv'
    ev.eval_model_on_cv(df=df,
                        log_file=log_path,
                        extra_model_params=model_params)
    results_df = DDF.from_csv(log_path)
    assert len(results_df) == 1
    assert results_df['rmse'][0] > 0
コード例 #4
0
def test_get_lb_ixs():
    df = DDF({
        'col1': np.arange(5),
        'SalePrice': np.array([100.]*3 + [np.nan]*2)
        })
    ixs = ms.get_lb_ixs(df)
    expected = {
            0: {'train': np.array([True]*3 + [False]*2),
                'val': np.array([False]*3 + [True]*2)}
            }
    assert np.all(expected[0]['train'] == ixs[0]['train'])
    assert np.all(expected[0]['val'] == ixs[0]['val'])
コード例 #5
0
import numpy as np

from wutils.ddf import DDF
from housePriceDone import make_submission as ms


df = DDF({
    'Id': np.arange(3),
    'MiscFeature': [np.nan, 'Gar2', 'Othr'],
    'MoSold': [2, 5, 9],
    'SalePrice': [1000, 20000, 400000]
    })

train_df = df.colslice(df.columns)
test_df = df.drop_columns(['SalePrice'])


def test_make_submission_file(tmpdir):
    sub_path = str(tmpdir) + 'sub.csv'
    model_params = {'min_data': 1, 'min_data_in_bin': 1}
    ms.make_submission_file(train_df, test_df, sub_path, extra_model_params=model_params)
    result_df = DDF.from_csv(sub_path)
    assert 'SalePrice' in result_df.columns
    assert 'Id' in result_df.columns
    assert len(result_df.columns) == 2


def test_append_dfs():
    result_df = ms.append_dfs(train_df, test_df)
    assert len(result_df) == len(train_df) + len(test_df)
コード例 #6
0
ファイル: test_eval_model.py プロジェクト: chechir/warsaw
import numpy as np

from wutils.ddf import DDF

from housePriceDone.data import TRAIN_PATH, load_df
from housePriceDone import eval_model as ev

df = DDF({
    'MSZoning': ['FV', 'RH', 'RM'],
    'MiscFeature': [np.nan, 'Gar2', 'Othr'],
    'MoSold': [2, 5, 9],
    'SalePrice': [1000, 20000, 400000]
})


def test_eval_model_on_cv(tmpdir):
    df = load_df(TRAIN_PATH, nrows=10)
    model_params = {'min_data': 1, 'min_data_in_bin': 1}
    log_path = str(tmpdir) + 'file.csv'
    ev.eval_model_on_cv(df=df,
                        log_file=log_path,
                        extra_model_params=model_params)
    results_df = DDF.from_csv(log_path)
    assert len(results_df) == 1
    assert results_df['rmse'][0] > 0


def test_clean_data():
    result_df = ev.clean_data(df)
    assert result_df['MSZoning'].dtype == float
    assert np.all(df['MoSold'] == result_df['MoSold'])
コード例 #7
0
ファイル: data.py プロジェクト: chechir/warsaw
def load_df(data_path, nrows=None):
    return DDF.from_csv(data_path, nrows=nrows)
コード例 #8
0
def generate_sub_file(preds, sub_id, sub_file_path):
    df = DDF({
        'Id': sub_id,
        ev.target_name: np.exp(preds)
        })
    df.to_csv(sub_file_path)