def test_handles_datetime_format(self): # check if we load according to the format string # pass in an ambigious date datetime_format = "%d-%m-%Y" actual = pd.Timestamp('Jan 2, 2011') time_strs = [actual.strftime(datetime_format)] * 3 df = pd.DataFrame({ 'id': [0, 1, 2], 'time_format': time_strs, 'time_no_format': time_strs }) vtypes = { 'id': variable_types.Categorical, 'time_format': (variable_types.Datetime, { "format": datetime_format }), 'time_no_format': variable_types.Datetime } entityset = EntitySet(id='test') entityset._import_from_dataframe(entity_id='test_entity', index='id', variable_types=vtypes, dataframe=df) col_format = entityset.get_column_data('test_entity', 'time_format') col_no_format = entityset.get_column_data('test_entity', 'time_no_format') # without formatting pandas gets it wrong assert (col_no_format != actual).all() # with formatting we correctly get jan2 assert (col_format == actual).all()
def test_handles_datetime_format(self): # check if we load according to the format string # pass in an ambigious date datetime_format = "%d-%m-%Y" actual = pd.Timestamp('Jan 2, 2011') time_strs = [actual.strftime(datetime_format)] * 3 df = pd.DataFrame( {'id': [0, 1, 2], 'time_format': time_strs, 'time_no_format': time_strs}) vtypes = {'id': variable_types.Categorical, 'time_format': (variable_types.Datetime, {"format": datetime_format}), 'time_no_format': variable_types.Datetime} entityset = EntitySet(id='test') entityset._import_from_dataframe(entity_id='test_entity', index='id', variable_types=vtypes, dataframe=df) col_format = entityset.get_column_data('test_entity', 'time_format') col_no_format = entityset.get_column_data( 'test_entity', 'time_no_format') # without formatting pandas gets it wrong assert (col_no_format != actual).all() # with formatting we correctly get jan2 assert (col_format == actual).all()
def test_converts_datetime(self): # string converts to datetime correctly # This test fails without defining vtypes. Entityset # infers time column should be numeric type times = pd.date_range('1/1/2011', periods=3, freq='H') time_strs = times.strftime('%Y-%m-%d') df = pd.DataFrame({'id': [0, 1, 2], 'time': time_strs}) vtypes = {'id': variable_types.Categorical, 'time': variable_types.Datetime} entityset = EntitySet(id='test') entityset._import_from_dataframe(entity_id='test_entity', index='id', time_index="time", variable_types=vtypes, dataframe=df) pd_col = entityset.get_column_data('test_entity', 'time') # assert type(es['test_entity']['time']) == variable_types.Datetime assert type(pd_col[0]) == pd.Timestamp