def test_random_state(): X = generate_df_from_array(np.random.normal(size=10)) random_state = 1234 for n_intervals in [0.5, 10, 'sqrt', 'random', 'log']: trans = RandomIntervalSegmenter(n_intervals=n_intervals, random_state=random_state) first_Xt = trans.fit_transform(X) for _ in range(N_ITER): trans = RandomIntervalSegmenter(n_intervals=n_intervals, random_state=random_state) Xt = trans.fit_transform(X) np.testing.assert_array_equal(tabularize(first_Xt).values, tabularize(Xt).values)
def test_different_implementations(): random_seed = 1233 X_train, y_train = load_gunpoint(return_X_y=True) # Compare with chained transformations. tran1 = RandomIntervalSegmenter(n_intervals='sqrt', random_state=random_seed) tran2 = RowwiseTransformer( FunctionTransformer(func=np.mean, validate=False)) A = tran2.fit_transform(tran1.fit_transform(X_train)) tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean], random_state=random_seed) B = tran.fit_transform(X_train) np.testing.assert_array_equal(A, B)
def test_output_format_dim(len_series, n_instances, n_intervals): X = generate_df_from_array(np.ones(len_series), n_rows=n_instances, n_cols=1) trans = RandomIntervalSegmenter(n_intervals=n_intervals) Xt = trans.fit_transform(X) # Check number of rows and output type. assert isinstance(Xt, pd.DataFrame) assert Xt.shape[0] == X.shape[0] # Check number of generated intervals/columns. if n_intervals != 'random': if np.issubdtype(type(n_intervals), np.floating): assert Xt.shape[1] == np.maximum(1, int(len_series * n_intervals)) elif np.issubdtype(type(n_intervals), np.integer): assert Xt.shape[1] == n_intervals elif n_intervals == 'sqrt': assert Xt.shape[1] == np.maximum(1, int(np.sqrt(len_series))) elif n_intervals == 'log': assert Xt.shape[1] == np.maximum(1, int(np.log(len_series)))