예제 #1
0
def test_io_speed() -> None:
    exp_size = 250
    act_size = 0

    with Timer() as timer:
        dataset = ComplexSeasonalTimeSeries(
            num_series=exp_size,
            freq_str="D",
            length_low=100,
            length_high=200,
            min_val=0.0,
            max_val=20000.0,
            proportion_missing_values=0.1,
        ).generate()
    print(f"Test data generation took {timer.interval} seconds")

    # name of method, loading function and maximum slowdown expected
    fixtures = [
        ("baseline", baseline, 100_000),
        # ('json.loads', load_json, xxx),
        ("ujson.loads", load_ujson, 20000),
        ("JsonLinesFile", load_json_lines_file, 20000),
        ("ListDataset", load_list_dataset, 500),
        ("FileDataset", load_file_dataset, 500),
        ("FileDatasetNumpy", load_file_dataset_numpy, 500),
        ("ParsedDataset", load_parsed_dataset, 500),
    ]

    with tempfile.TemporaryDirectory() as path:
        # save the generated dataset to a temporary folder
        with Timer() as timer:
            save_datasets(dataset, path)
        print(f"Test data saving took {timer.interval} seconds")

        # for each loader, read the dataset, assert that the number of lines is
        # correct, and record the lines/sec rate
        rates = {}
        for name, get_loader, _ in fixtures:
            with Timer() as timer:
                loader = get_loader(
                    Path(path) / "train", dataset.metadata.freq)
                for act_size, _ in enumerate(loader, start=1):
                    pass
            rates[name] = int(act_size / max(timer.interval, 0.00001))
            print(f"Loader {name:13} achieved a rate of {rates[name]:10} "
                  f"lines/second")
            assert exp_size == act_size, (
                f"Loader {name} did not yield the expected number of "
                f"{exp_size} lines")

        # for each loader, assert that the slowdown w.r.t. the baseline loader
        # is not above the max. tolerated value
        for name, _, min_rate in fixtures:
            assert min_rate <= rates[name], (
                f"The throughput of {name} ({rates[name]} lines/second) "
                f"was below the allowed maximum rate {min_rate}.")
def create_multivariate_datasets(data_dir: str) -> None:
    info, train_ds, test_ds = default_synthetic()
    save_datasets(TrainDatasets(metadata=info.metadata, train=train_ds, test=test_ds), data_dir)
    return
 def save_datasets(self, path, overwrite=True):
     return save_datasets(self.datasets, path, overwrite)