def memory_builder_factory(feature_set, look_back, look_forward, batch_size, batch_seconds=1, validation_split=0, pseudo_stratify=False, stratify_nbatch_groupings=20, n_workers=None, seed=None, normalize=True, custom_transforms=None, verbose=False): storage_meta = StorageMeta(validation_split=validation_split) storage = BatchStorageMemory(storage_meta) translate = Translate(feature_set, look_back, look_forward, batch_seconds, normalize, verbose, custom_transforms) return Builder(storage=storage, translate=translate, batch_size=batch_size, pseudo_stratify=pseudo_stratify, stratify_nbatch_groupings=stratify_nbatch_groupings, verbose=verbose, seed=seed, n_workers=n_workers)
def s3_builder_factory(s3_bucket_resource, feature_set, look_back, look_forward, batch_size, s3_prefix="", batch_seconds=1, stride=1, validation_split=0, pseudo_stratify=False, stratify_nbatch_groupings=20, n_workers=None, seed=None, normalize=True, custom_transforms=None, session_norm_filter=None, verbose=False): storage_meta = StorageMeta(validation_split=validation_split) storage = BatchStorageS3(storage_meta, s3_bucket_resource=s3_bucket_resource, s3_prefix=s3_prefix) translate = Translate(feature_set, look_back, look_forward, batch_seconds, stride, normalize, verbose, custom_transforms, session_norm_filter) return Builder(storage=storage, translate=translate, batch_size=batch_size, pseudo_stratify=pseudo_stratify, stratify_nbatch_groupings=stratify_nbatch_groupings, verbose=verbose, seed=seed, n_workers=n_workers)
def test_normalize_on(): feature_df_list = reduce(add, [[pd.DataFrame({"time": pd.to_datetime(list(range(50)), unit="s"), "A": range(1, 51), "B": range(101, 151), "y": np.ones(50)}), pd.DataFrame({"time": pd.to_datetime(list(range(50)), unit="s"), "A": range(51, 101), "B": range(151, 201), "y": np.ones(50)})] for _ in range(5)], []) meta = StorageMeta() storage = BatchStorageMemory(meta) translate = Translate(features=["A", "B"], look_back=0, look_forward=0, n_seconds=1, normalize=True, verbose=True) batch_generator = Builder(storage, translate, batch_size=10, pseudo_stratify=False) batch_generator.generate_and_save_batches(feature_df_list) tools.assert_almost_equal(translate.scaler.mean_[0], 50, delta=1) tools.assert_almost_equal(translate.scaler.mean_[1], 150, delta=1) for batch in storage._data.values(): # all batches have monotonically increasing numbers (range used to create data) assert np.diff(batch["features"][:, 0, 0]).all() # feature A assert np.diff(batch["features"][:, 0, 1]).all() # feature B
def test_builder_stratify(): feature_set = sorted(["A", "B"]) feature_df_list = [ pd.DataFrame({ "time": pd.to_datetime(list(range(160)), unit="s"), "A": np.ones(160), "B": np.ones(160), "y": np.ones(160) }) for _ in range(1) ] meta = StorageMeta(validation_split=0.5) storage = BatchStorageMemory(meta) translate = Translate(features=feature_set, look_back=0, look_forward=0, n_seconds=1) batch_generator = Builder(storage, translate, batch_size=16, stratify_nbatch_groupings=3, pseudo_stratify=True) batch_generator.generate_and_save_batches(feature_df_list) assert batch_generator._stratify tools.eq_(len(meta.train.ids), 5) tools.eq_(len(meta.validation.ids), 5)
def test_normalize_off(): feature_df_list = [ pd.DataFrame({ "time": pd.to_datetime(list(range(160)), unit="s"), "A": range(160), "B": range(160), "y": np.ones(160) }) for _ in range(1) ] meta = StorageMeta() storage = BatchStorageMemory(meta) translate = Translate(features=["A", "B"], look_back=0, look_forward=0, n_seconds=1, normalize=False) batch_generator = Builder(storage, translate, batch_size=16, pseudo_stratify=False) batch_generator.generate_and_save_batches(feature_df_list) for batch in storage._data.values(): # all batches have monotonically increasing numbers (range used to create data) assert np.diff(batch["features"][:, 0, 0]).all() # feature A assert np.diff(batch["features"][:, 0, 1]).all() # feature B
def test_mem_storage_save(): meta = StorageMeta() storage = BatchStorageMemory(meta) X = np.array([1, 2, 3]) y = np.array([0, 0, 0]) filename = storage.save(X, y) assert filename in list(storage._data.keys())
def test_file_storage_save(): meta = StorageMeta() storage = BatchStorageFile(meta, directory="test") X = np.array([1, 2, 3]) y = np.array([0, 0, 0]) filename = storage.save(X, y) assert os.path.isfile(filename)
def test_file_storage_load(): meta = StorageMeta() storage = BatchStorageFile(meta, directory="test") X = np.array([1, 2, 3]) y = np.array([0, 0, 0]) storage.save(X, y) X_data, y_data = storage.load(0) assert np.array_equal(X_data, X) assert np.array_equal(y_data, y)
def test_mem_storage_load(): meta = StorageMeta() storage = BatchStorageMemory(meta) X = np.array([1, 2, 3]) y = np.array([0, 0, 0]) storage.save(X, y) X_data, y_data = storage.load(0) assert np.array_equal(X_data, X) assert np.array_equal(y_data, y)
def test_storage_s3(): conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket="test_bucket") storage = BatchStorageS3(StorageMeta(), conn.Bucket("test_bucket"), "test") X = np.array([1, 2, 3]) y = np.array([0, 0, 0]) storage.save(X, y) X_data, y_data = storage.load(0) assert np.array_equal(X_data, X) assert np.array_equal(y_data, y)
def test_file_storage_metadata(): meta = StorageMeta() storage = BatchStorageFile(meta, directory="test") X = np.array([1, 2, 3]) y = np.array([0, 0, 0]) storage.save(X, y) storage.save_meta({}) params = storage.load_meta() assert len(params["train_ids"]) == 1 assert params["train_map"][params["train_ids"][0]] == "ID_0" assert len(params["val_ids"]) == 0
def test_mem_storage_metadata_val(): meta = StorageMeta(validation_split=1.0) storage = BatchStorageMemory(meta) X = np.array([1, 2, 3]) y = np.array([0, 0, 0]) storage.save(X, y) storage.save_meta({}) params = storage.load_meta() assert len(params["val_ids"]) == 1 assert params["val_map"][params["val_ids"][0]] == "IDv_0" assert len(params["train_ids"]) == 0
def test_s3_storage_metadata(): conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket="test_bucket") meta = StorageMeta() storage = BatchStorageS3.from_config(meta, "test_bucket", s3_prefix="test") X = np.array([1, 2, 3]) y = np.array([0, 0, 0]) storage.save(X, y) storage.save_meta({}) params = storage.load_meta() assert len(params["train_ids"]) == 1 assert params["train_map"][params["train_ids"][0]] == "ID_0" assert len(params["val_ids"]) == 0
def test_save_and_load_meta(): feature_df_list = [ pd.DataFrame({ "time": pd.to_datetime(list(range(160)), unit="s"), "A": range(160), "B": range(160), "y": np.ones(160) }) for _ in range(1) ] meta = StorageMeta() storage = BatchStorageMemory(meta) translate = Translate(features=["A", "B"], look_back=0, look_forward=0, n_seconds=1, normalize=False) batch_generator = Builder(storage, translate, batch_size=16, pseudo_stratify=False) batch_generator.generate_and_save_batches(feature_df_list) batch_generator.save_meta() translate = Translate(features=["A", "B"], look_back=99, look_forward=99, n_seconds=99, normalize=True) batch_generator_reload = Builder(storage, translate, batch_size=99, pseudo_stratify=False) batch_generator_reload.load_meta() tools.eq_(batch_generator.batch_size, batch_generator_reload.batch_size) tools.eq_(translate._features, translate._features) tools.eq_(translate._look_forward, translate._look_forward) tools.eq_(translate._look_back, translate._look_back) tools.eq_(translate._n_seconds, translate._n_seconds) tools.eq_(translate._normalize, translate._normalize)
def test_builder_storage_meta_validation(): feature_set = sorted(["A", "B"]) feature_df_list = [ pd.DataFrame({ "time": pd.to_datetime(list(range(35)), unit="s"), "A": np.ones(35), "B": np.ones(35), "y": np.ones(35) }) for _ in range(1) ] meta = StorageMeta(validation_split=0.5) storage = BatchStorageMemory(meta) translate = Translate(features=feature_set, look_back=2, look_forward=1, n_seconds=1) batch_generator = Builder(storage, translate, batch_size=16) batch_generator.generate_and_save_batches(feature_df_list) tools.eq_(len(meta.train.ids), 1) tools.eq_(len(meta.validation.ids), 1)
def test_file_storage_directory(): meta = StorageMeta() storage = BatchStorageFile(meta, directory="test") tools.eq_(storage.directory, "test") assert os.path.exists("test"), True
def test_load_empty_s3_meta(): conn = boto3.resource("s3", region_name="us-east-1") conn.create_bucket(Bucket="test_bucket") BatchStorageS3(StorageMeta(), conn.Bucket("test_bucket")).load_meta()
def test_load_empty_file_meta(): BatchStorageFile(StorageMeta(), directory="test").load_meta()
def test_load_empty_meta(): BatchStorageMemory(StorageMeta()).load_meta()