def test_translate_alone(): feature_set = sorted(["A", "B"]) for l in [32, 64, 128]: feature_df_list = [ pd.DataFrame({ "time": pd.to_datetime(list(range(l)), unit="s"), "A": np.array(list(range(l))), "B": np.array(list(range(l))), "y": np.ones(l) }) for _ in range(1) ] for (look_back, look_forward) in [(3, 2), (1, 0)]: custom_transforms = list() custom_transforms.append(remove_false_anchors_factory("y")) custom_transforms.append( split_flat_df_by_time_factory(look_back, look_forward, 1)) translate = Translate(features=feature_set, look_back=look_back, look_forward=look_forward, n_seconds=1, custom_transforms=custom_transforms, normalize=False) X, y = translate.scale_and_transform_session(feature_df_list[0]) tools.eq_(X.shape, (l - (look_back + look_forward), (look_back + look_forward + 1), 2)) tools.eq_(len(y), l - (look_back + look_forward)) # first elements should slide forward in time one element at a time np.array_equal(X[:, 0, 0], np.array(list(range(l)))) # second elements should slide forward one at a time starting at 1 np.array_equal(X[:, 1, 0], np.array(list(range(l))) + 1)
def test_time_gaps_split(): n = 50 df = pd.DataFrame({ "time": range(n), "A": np.random.randn(n), "B": np.random.randn(n), "y": np.random.randint(2, size=n) }) df.loc[25:, "time"] += 1 df["time"] = pd.to_datetime(df["time"], unit="s") sfd = split_flat_df_by_time_factory(look_back=5, look_forward=5, n_seconds=1) res = sfd(df) tools.eq_(len(res), 2) assert df.equals(pd.concat(res, axis=0))
def test_time_gaps_fully_segmented(): n = 50 gap = 2 df = pd.DataFrame({ "time": pd.to_datetime(list(range(0, n * gap, gap)), unit="s"), "A": np.random.randn(n), "B": np.random.randn(n), "y": np.random.randint(2, size=n) }) sfd = split_flat_df_by_time_factory(look_back=0, look_forward=0, n_seconds=1) res = sfd(df) tools.eq_(len(res), n)
def test_time_gaps(): n = 50 for gap in range(1, 3): df = pd.DataFrame({ "time": pd.to_datetime(list(range(0, n * gap, gap)), unit="s"), "A": np.random.randn(n), "B": np.random.randn(n), "y": np.random.randint(2, size=n) }) sfd = split_flat_df_by_time_factory(look_back=5, look_forward=5, n_seconds=gap) res = sfd(df) assert df.equals(res[0]) tools.eq_(len(res), 1)