def test_factorize(): fact = DateFactorizer(cols=['b'], features=("year", "month")) trans = fact.fit_transform(df) assert trans.columns.tolist() == ['a', 'b_year', 'b_month'] # Assert that the transform function works independent of fit_transform assert trans.equals(fact.transform(df))
def test_factorize_attribute_error(): # also show we can handle a non-iterable in features factorizer = DateFactorizer(cols=['b'], features="yr") assert_raises(AttributeError, factorizer.fit, df)
def test_factorize_preserve_original(): # keep the original columns trans = DateFactorizer(cols=['b'], features=("year", "month"), drop_original=False).fit_transform(df) assert trans.columns.tolist() == ['a', 'b', 'b_year', 'b_month']
def test_non_date_factorize(): # Fails since not a date time assert_raises(ValueError, DateFactorizer(cols=["a", "b"]).fit, df)
def test_date_factorizer_asdf(): assert_transformer_asdf(DateFactorizer(cols=['b']), df)
def test_date_factorizer_persistable(): assert_persistable(DateFactorizer(cols=['b']), location="loc.pkl", X=df)
from datetime import datetime as dt # ############################################################################# # create data data = [[1, dt.strptime("06-01-2018 12:00:05", "%m-%d-%Y %H:%M:%S")], [2, dt.strptime("06-02-2018 13:19:12", "%m-%d-%Y %H:%M:%S")], [3, dt.strptime("06-03-2018 06:04:17", "%m-%d-%Y %H:%M:%S")], [4, dt.strptime("06-04-2018 03:56:32", "%m-%d-%Y %H:%M:%S")], [5, None]] df = pd.DataFrame.from_records(data, columns=["transaction_id", "time"]) # We can extract a multitude of features from date fields. The default will # grab the year, month, day and hour print("Default features:") print(DateFactorizer(cols=['time']).fit_transform(df)) # we can specify more if we'd like: print("\n+Minutes, +Seconds:") print( DateFactorizer(cols=['time'], features=("year", "month", "day", "hour", "minute", "second")).fit_transform(df)) # And we can retain the old (pre-transform) time features if we wanted print("\nSame as above, but retain old time column:") print( DateFactorizer(cols=['time'], drop_original=False, features=("year", "month", "day", "hour", "minute", "second")).fit_transform(df))