def test_date_encoder_returns_only_day(self, dates: pd.DataFrame):
        date_coder = DateEncoder(day=True, month=False, week=False, year=False)
        result = date_coder.fit_transform(dates)

        assert isinstance(result, pd.DataFrame)
        assert 1 == len(result.columns)
        assert len(dates) == len(result)
        assert "date_a_year" not in result.columns
        assert "date_a_day" in result.columns
        assert "date_a_month" not in result.columns
        assert "date_a_week" not in result.columns
Example #2
0
def test_date_encoder_returns_only_week(dates):
    week_coder = DateEncoder(day=False, month=False, week=True, year=False)
    result = week_coder.fit_transform(dates)

    assert isinstance(result, pd.DataFrame)
    assert 1 == len(result.columns)
    assert len(dates) == len(result)
    assert 'date_a_year' not in result.columns
    assert 'date_a_day' not in result.columns
    assert 'date_a_month' not in result.columns
    assert 'date_a_week' in result.columns
Example #3
0
def test_date_encoder_returns_correctly(dates):
    date_coder = DateEncoder()
    result = date_coder.fit_transform(dates)

    assert isinstance(result, pd.DataFrame)
    assert 4 == len(result.columns)
    assert len(dates) == len(result)
    for col in result.columns:
        assert pd.api.types.is_numeric_dtype(result[col])
    assert 'date_a' not in result.columns
    assert 'date_a_year' in result.columns
    assert 'date_a_day' in result.columns
    assert 'date_a_month' in result.columns
    assert 'date_a_week' in result.columns
 def test_date_encoder_works_in_grid_search(self, dates: pd.DataFrame):
     pipe = create_pipeline(DateEncoder())
     grid = GridSearchCV(
         pipe,
         param_grid={"clf__strategy": ["stratified", "most_frequent"]},
         cv=2,
     )
     grid.fit(dates, [0, 0, 1, 1])
     assert hasattr(grid, "best_score_")
 def test_date_encoder_works_in_cv(self, dates: pd.DataFrame):
     pipe = create_pipeline(DateEncoder())
     score = cross_val_score(pipe, dates, y=[0, 0, 1, 1], n_jobs=2, cv=2)
     assert 2 == len(score)
 def test_works_without_args(self):
     assert DateEncoder()
Example #7
0
"""
host_since
==========
When started hosting. Hypothesis that being a host for longer affects the price - they might be able to charge a different price.
For our solution, we can set it to 0 or ask.

Is a date - note that date is not a dtype, but we can set read_csv to parse it automatically as a date
dtype: datetime
"""
from ml_tooling.transformers import Select, DateEncoder
from sklearn.pipeline import Pipeline

host_since = Pipeline([("select", Select("host_since")),
                       ("date_encoder", DateEncoder())])