def test_serialization(): times = [ Timedelta(1, unit='w'), Timedelta(3, unit='d', inclusive=True), Timedelta(5, unit='o', entity='log'), ] dictionaries = [ { 'value': 1, 'unit': 'w', 'entity_id': None, 'inclusive': False }, { 'value': 3, 'unit': 'd', 'entity_id': None, 'inclusive': True }, { 'value': 5, 'unit': 'o', 'entity_id': 'log', 'inclusive': False }, ] for td, expected in zip(times, dictionaries): assert expected == td.get_arguments() for expected, dictionary in zip(times, dictionaries): assert expected == Timedelta.from_dictionary(dictionary)
def test_deltas_week(es): df = es.related_instances('customers', 'log', 0) all_times = df['datetime'].sort_values().tolist() delta_week = Timedelta(1, "w") delta_days = Timedelta(7, "d") assert all_times[0] + delta_days == all_times[0] + delta_week
def test_accepts_relative_training_window(datetime_es): feature_matrix, features = dfs(entityset=datetime_es, target_entity="transactions") feature_matrix_2, features_2 = dfs(entityset=datetime_es, target_entity="transactions", cutoff_time=pd.Timestamp("2012-4-1 04:00")) feature_matrix_3, features_3 = dfs(entityset=datetime_es, target_entity="transactions", cutoff_time=pd.Timestamp("2012-4-1 04:00"), training_window=Timedelta("3 months")) feature_matrix_4, features_4 = dfs(entityset=datetime_es, target_entity="transactions", cutoff_time=pd.Timestamp("2012-4-1 04:00"), training_window="3 months") # Test case for leap years feature_matrix_5, features_5 = dfs(entityset=datetime_es, target_entity="transactions", cutoff_time=pd.Timestamp("2012-2-29 04:00"), training_window=Timedelta("1 year")) assert (feature_matrix.index == [1, 2, 3, 4, 5]).all() assert (feature_matrix_2.index == [1, 2, 3, 4]).all() assert (feature_matrix_3.index == [2, 3, 4]).all() assert (feature_matrix_4.index == [2, 3, 4]).all() assert (feature_matrix_5.index == [1, 2]).all()
def test_delta_with_observations(es): four_delta = Timedelta(4, 'observations', 'log') assert not four_delta.is_absolute() assert four_delta.value == 4 neg_four_delta = -four_delta assert not neg_four_delta.is_absolute() assert neg_four_delta.value == -4
def test_serialization(): times = [ Timedelta(1, unit='w'), Timedelta(3, unit='d'), Timedelta(5, unit='o') ] dictionaries = [ {'value': 1, 'unit': 'w'}, {'value': 3, 'unit': 'd'}, {'value': 5, 'unit': 'o'} ] for td, expected in zip(times, dictionaries): assert expected == td.get_arguments() for expected, dictionary in zip(times, dictionaries): assert expected == Timedelta.from_dictionary(dictionary) # Test multiple temporal parameters separately since it is not deterministic mult_time = {'years': 4, 'months': 3, 'days': 2} mult_td = Timedelta(mult_time) # Serialize td_units = mult_td.get_arguments()['unit'] td_values = mult_td.get_arguments()['value'] arg_list = list(zip(td_values, td_units)) assert (4, 'Y') in arg_list assert (3, 'mo') in arg_list assert (2, 'd') in arg_list # Deserialize assert mult_td == Timedelta.from_dictionary({'value': [4, 3, 2], 'unit': ['Y', 'mo', 'd']})
def test_deltas_week(es): customer_id = 0 sessions_df = to_pandas(es['sessions']) sessions_df = sessions_df[sessions_df['customer_id'] == customer_id] log_df = to_pandas(es['log']) log_df = log_df[log_df['session_id'].isin(sessions_df['id'])] all_times = log_df['datetime'].sort_values().tolist() delta_week = Timedelta(1, "w") delta_days = Timedelta(7, "d") assert all_times[0] + delta_days == all_times[0] + delta_week
def test_deltas_week(es): customer_id = 0 sessions_df = to_pandas(es["sessions"]) sessions_df = sessions_df[sessions_df["customer_id"] == customer_id] log_df = to_pandas(es["log"]) log_df = log_df[log_df["session_id"].isin(sessions_df["id"])] all_times = log_df["datetime"].sort_values().tolist() delta_week = Timedelta(1, "w") delta_days = Timedelta(7, "d") assert all_times[0] + delta_days == all_times[0] + delta_week
def test_deltas_week(es): customer_id = 0 sessions_df = es['sessions'].df if isinstance(sessions_df, dd.DataFrame): sessions_df = sessions_df.compute() sessions_df = sessions_df[sessions_df['customer_id'] == customer_id] log_df = es['log'].df if isinstance(log_df, dd.DataFrame): log_df = log_df.compute() log_df = log_df[log_df['session_id'].isin(sessions_df['id'])] all_times = log_df['datetime'].sort_values().tolist() delta_week = Timedelta(1, "w") delta_days = Timedelta(7, "d") assert all_times[0] + delta_days == all_times[0] + delta_week
def test_accepts_relative_training_window(datetime_es): # TODO: Update to use Dask dataframes when issue #882 is closed feature_matrix, _ = dfs(entityset=datetime_es, target_dataframe_name="transactions") feature_matrix_2, _ = dfs( entityset=datetime_es, target_dataframe_name="transactions", cutoff_time=pd.Timestamp("2012-4-1 04:00"), ) feature_matrix_3, _ = dfs( entityset=datetime_es, target_dataframe_name="transactions", cutoff_time=pd.Timestamp("2012-4-1 04:00"), training_window=Timedelta("3 months"), ) feature_matrix_4, _ = dfs( entityset=datetime_es, target_dataframe_name="transactions", cutoff_time=pd.Timestamp("2012-4-1 04:00"), training_window="3 months", ) assert (feature_matrix.index == [1, 2, 3, 4, 5]).all() assert (feature_matrix_2.index == [1, 2, 3, 4]).all() assert (feature_matrix_3.index == [2, 3, 4]).all() assert (feature_matrix_4.index == [2, 3, 4]).all() # Test case for leap years feature_matrix_5, _ = dfs( entityset=datetime_es, target_dataframe_name="transactions", cutoff_time=pd.Timestamp("2012-2-29 04:00"), training_window=Timedelta("1 year"), include_cutoff_time=True, ) assert (feature_matrix_5.index == [2]).all() feature_matrix_5, _ = dfs( entityset=datetime_es, target_dataframe_name="transactions", cutoff_time=pd.Timestamp("2012-2-29 04:00"), training_window=Timedelta("1 year"), include_cutoff_time=False, ) assert (feature_matrix_5.index == [1, 2]).all()
def test_delta_with_time_unit_matches_pandas(es): customer_id = 0 sessions_df = es['sessions'].df if isinstance(sessions_df, dd.DataFrame): sessions_df = sessions_df.compute() sessions_df = sessions_df[sessions_df['customer_id'] == customer_id] log_df = es['log'].df if isinstance(log_df, dd.DataFrame): log_df = log_df.compute() log_df = log_df[log_df['session_id'].isin(sessions_df['id'])] all_times = log_df['datetime'].sort_values().tolist() # 4 observation delta value = 4 unit = 'h' delta = Timedelta(value, unit) neg_delta = -delta # first plus 4 obs is fifth assert all_times[0] + delta == all_times[0] + pd.Timedelta(value, unit) # using negative assert all_times[0] - neg_delta == all_times[0] + pd.Timedelta(value, unit) # fifth minus 4 obs is first assert all_times[4] - delta == all_times[4] - pd.Timedelta(value, unit) # using negative assert all_times[4] + neg_delta == all_times[4] - pd.Timedelta(value, unit)
def test_feature_takes_timedelta_string(es): feature = Feature( Feature(es["log"].ww["id"]), parent_dataframe_name="customers", use_previous="1 day", primitive=Count, ) assert feature.use_previous == Timedelta(1, "d")
def test_delta_with_observations(es): four_delta = Timedelta(4, 'observations') assert not four_delta.is_absolute() assert four_delta.get_value('o') == 4 neg_four_delta = -four_delta assert not neg_four_delta.is_absolute() assert neg_four_delta.get_value('o') == -4 time = pd.to_datetime('2019-05-01') error_txt = 'Invalid unit' with pytest.raises(Exception, match=error_txt): time + four_delta with pytest.raises(Exception, match=error_txt): time - four_delta
def test_serialization(): times = [ Timedelta(1, unit="w"), Timedelta(3, unit="d"), Timedelta(5, unit="o") ] dictionaries = [ { "value": 1, "unit": "w" }, { "value": 3, "unit": "d" }, { "value": 5, "unit": "o" }, ] for td, expected in zip(times, dictionaries): assert expected == td.get_arguments() for expected, dictionary in zip(times, dictionaries): assert expected == Timedelta.from_dictionary(dictionary) # Test multiple temporal parameters separately since it is not deterministic mult_time = {"years": 4, "months": 3, "days": 2} mult_td = Timedelta(mult_time) # Serialize td_units = mult_td.get_arguments()["unit"] td_values = mult_td.get_arguments()["value"] arg_list = list(zip(td_values, td_units)) assert (4, "Y") in arg_list assert (3, "mo") in arg_list assert (2, "d") in arg_list # Deserialize assert mult_td == Timedelta.from_dictionary({ "value": [4, 3, 2], "unit": ["Y", "mo", "d"] })
def test_delta_with_observations(es): df = es.related_instances('customers', 'log', 0) all_times = df['datetime'].sort_values().tolist() # 4 observation delta four_delta = Timedelta(4, 'observations', 'log')('customers', instance_id=0, entityset=es) neg_four_delta = -four_delta # first plus 4 obs is fifth assert all_times[0] + four_delta == all_times[4] # using negative assert all_times[0] - neg_four_delta == all_times[4] # fifth minus 4 obs is first assert all_times[4] - four_delta == all_times[0] # using negative assert all_times[4] + neg_four_delta == all_times[0] # Test 0 observations zero_delta = Timedelta(0, 'observations', 'log')('customers', instance_id=0, entityset=es) neg_zero_delta = -zero_delta assert all_times[0] + zero_delta == all_times[0] assert all_times[0] - zero_delta == all_times[0] assert all_times[0] + neg_zero_delta == all_times[0] assert all_times[0] - neg_zero_delta == all_times[0] # Errors when trying to add or subtract more observations than available large_delta = Timedelta(99999, 'observations', 'log')('customers', instance_id=0, entityset=es) with pytest.raises(NotEnoughData): all_times[0] + large_delta with pytest.raises(NotEnoughData): all_times[0] - large_delta
def test_serialization(): times = [ Timedelta(1, unit='w'), Timedelta(3, unit='d'), Timedelta(5, unit='o'), ] dictionaries = [{ 'value': 1, 'unit': 'w' }, { 'value': 3, 'unit': 'd' }, { 'value': 5, 'unit': 'o' }] for td, expected in zip(times, dictionaries): assert expected == td.get_arguments() for expected, dictionary in zip(times, dictionaries): assert expected == Timedelta.from_dictionary(dictionary)
def test_delta_with_time_unit_matches_pandas(es): df = es.related_instances('customers', 'log', 0) all_times = df['datetime'].sort_values().tolist() # 4 observation delta value = 4 unit = 'h' delta = Timedelta(value, unit) neg_delta = -delta # first plus 4 obs is fifth assert all_times[0] + delta == all_times[0] + pd.Timedelta(value, unit) # using negative assert all_times[0] - neg_delta == all_times[0] + pd.Timedelta(value, unit) # fifth minus 4 obs is first assert all_times[4] - delta == all_times[4] - pd.Timedelta(value, unit) # using negative assert all_times[4] + neg_delta == all_times[4] - pd.Timedelta(value, unit)
def test_delta_with_time_unit_matches_pandas(es): customer_id = 0 sessions_df = to_pandas(es["sessions"]) sessions_df = sessions_df[sessions_df["customer_id"] == customer_id] log_df = to_pandas(es["log"]) log_df = log_df[log_df["session_id"].isin(sessions_df["id"])] all_times = log_df["datetime"].sort_values().tolist() # 4 observation delta value = 4 unit = "h" delta = Timedelta(value, unit) neg_delta = -delta # first plus 4 obs is fifth assert all_times[0] + delta == all_times[0] + pd.Timedelta(value, unit) # using negative assert all_times[0] - neg_delta == all_times[0] + pd.Timedelta(value, unit) # fifth minus 4 obs is first assert all_times[4] - delta == all_times[4] - pd.Timedelta(value, unit) # using negative assert all_times[4] + neg_delta == all_times[4] - pd.Timedelta(value, unit)
def test_week_to_days(): assert Timedelta("1001 weeks") == Timedelta(1001 * 7, "days")
def test_string_timedelta_args(): assert Timedelta("1 second") == Timedelta(1, "second") assert Timedelta("1 seconds") == Timedelta(1, "second") assert Timedelta("10 days") == Timedelta(10, "days") assert Timedelta("100 days") == Timedelta(100, "days") assert Timedelta("1001 days") == Timedelta(1001, "days") assert Timedelta("1001 weeks") == Timedelta(1001, "weeks")
def test_feature_takes_timedelta_string(es): feature = Count(es['log']['id'], es['customers'], use_previous="1 day") assert feature.use_previous == Timedelta(1, 'd')
def test_requires_entities_if_observations(): with pytest.raises(Exception): Timedelta(4, 'observations')
def test_singular(): assert Timedelta.make_singular("Month") == "Month" assert Timedelta.make_singular("Months") == "Month"
def test_timedelta_equality(): assert Timedelta(10, "d") == Timedelta(10, "d") assert Timedelta(10, "d") != 1
def test_feature_takes_timedelta_string(es): feature = ft.Feature(es['log']['id'], parent_entity=es['customers'], use_previous="1 day", primitive=Count) assert feature.use_previous == Timedelta(1, 'd')
def test_feature_takes_timedelta_string(es): feature = Feature(Feature(es['log'].ww['id']), parent_dataframe_name='customers', use_previous="1 day", primitive=Count) assert feature.use_previous == Timedelta(1, 'd')
def test_requires_entities_if_observations(): error_txt = 'Must define entity to use o as unit' with pytest.raises(Exception, match=error_txt): Timedelta(4, 'observations')