Python Benchmark.pandas Examples

Programming Language: Python

Namespace/Package Name: aepsych.benchmark

Class/Type: Benchmark

Method/Function: pandas

Examples at hotexamples.com: 2

Python Benchmark.pandas - 2 examples found. These are the top rated real world Python examples of aepsych.benchmark.Benchmark.pandas extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Benchmark(10)

pandas(2)

collate_benchmarks(1)

Example #1

Show file

File: test_benchmark.py Project: facebookresearch/aepsych

    def test_bench_smoke(self):

        problem1 = TestProblem()
        problem2 = LSETestProblem()

        bench = Benchmark(
            problems=[problem1, problem2],
            configs=self.bench_config,
            n_reps=2,
            log_every=2,
        )
        bench.run_benchmarks()

        out = bench.pandas()

        # assert problem metadata was correctly saved
        self.assertEqual(sorted(out["problem_name"].unique()),
                         ["test lse problem", "test problem"])
        self.assertEqual(
            sorted(out[out["problem_name"] == "test lse problem"]
                   ["problem_threshold"].unique()),
            ["0.75"],
        )

        # assert derived values work correctly
        self.assertEqual(
            sorted(out[out["problem_name"] == "test problem"]
                   ["opt_strat_n_trials"].unique()),
            ["2", "3"],
        )
        self.assertEqual(
            sorted(out[out["problem_name"] == "test lse problem"]
                   ["opt_strat_n_trials"].unique()),
            ["1", "2"],
        )

        # have as many final results as we expect
        self.assertTrue(len(out[out.final]) == bench.num_benchmarks)

        # have as many repetitions as we expect
        self.assertTrue(len(out.rep.unique()) == bench.n_reps)

        # reporting intervals are correct
        self.assertTrue((out[~out.final].trial_id % 2 == 0).all())

        # we don't run extra trials
        total_trials = out.init_strat_n_trials.astype(
            int) + out.opt_strat_n_trials.astype(int)
        self.assertTrue((out.trial_id <= total_trials).all())

        # ensure each simulation has a unique random seed
        self.assertTrue(out[out["final"]]["seed"].is_unique)

Example #2

Show file

def run_benchmarks_with_checkpoints(
    out_path: str,
    benchmark_name: str,
    problems: List[Problem],
    configs: Mapping[str, Union[str, list]],
    global_seed: Optional[int] = None,
    n_chunks: int = 1,
    n_reps_per_chunk: int = 1,
    log_every: Optional[int] = None,
    checkpoint_every: int = 60,
    n_proc: int = 1,
    serial_debug: bool = False,
) -> None:
    """Runs a series of benchmarks, saving both final and intermediate results to .csv files. Benchmarks are run in
    sequential chunks, each of which runs all combinations of problems/configs/reps in parallel. This function should
    always be used using the "if __name__ == '__main__': ..." idiom.

    Args:
        out_path (str): The path to save the results to.
        benchmark_name (str): A name give to this set of benchmarks. Results will be saved in files named like
            "out_path/benchmark_name_chunk{chunk_number}_out.csv"
        problems (List[Problem]): Problem objects containing the test function to evaluate.
        configs (Mapping[str, Union[str, list]]): Dictionary of configs to run.
            Lists at leaves are used to construct a cartesian product of configurations.
        global_seed (int, optional): Global seed to use for reproducible benchmarks.
            Defaults to randomized seeds.
        n_chunks (int): The number of chunks to break the results into. Each chunk will contain at least 1 run of every
            combination of problem and config.
        n_reps_per_chunk (int, optional): Number of repetitions to run each problem/config in each chunk.
        log_every (int, optional): Logging interval during an experiment. Defaults to only logging at the end.
        checkpoint_every (int): Save intermediate results every checkpoint_every seconds.
        n_proc (int): Number of processors to use.
        serial_debug: debug serially?
    """
    Path(out_path).mkdir(parents=True,
                         exist_ok=True)  # make an output folder if not exist
    if serial_debug:
        out_fname = Path(f"{out_path}/{benchmark_name}_out.csv")
        print(f"Starting {benchmark_name} benchmark (serial debug mode)...")
        bench = Benchmark(
            problems=problems,
            configs=configs,
            seed=global_seed,
            n_reps=n_reps_per_chunk * n_chunks,
            log_every=log_every,
        )
        bench.run_benchmarks()
        final_results = bench.pandas()
        final_results.to_csv(out_fname)
    else:
        for chunk in range(n_chunks):
            out_fname = Path(
                f"{out_path}/{benchmark_name}_chunk{chunk}_out.csv")

            intermediate_fname = Path(
                f"{out_path}/{benchmark_name}_chunk{chunk}_checkpoint.csv")
            print(f"Starting {benchmark_name} benchmark... chunk {chunk} ")

            bench = PathosBenchmark(
                nproc=n_proc,
                problems=problems,
                configs=configs,
                seed=None,
                n_reps=n_reps_per_chunk,
                log_every=log_every,
            )

            if global_seed is None:
                global_seed = int(np.random.randint(0, 200))
            bench.seed = (
                global_seed + chunk * bench.num_benchmarks
            )  # HACK. TODO: make num_benchmarks a property of bench configs
            bench.start_benchmarks()

            while not bench.is_done:
                time.sleep(checkpoint_every)
                collate_start = time.time()
                print(
                    f"Checkpointing {benchmark_name} chunk {chunk}..., {len(bench.futures)}/{bench.num_benchmarks} alive"
                )
                bench.collate_benchmarks(wait=False)
                temp_results = bench.pandas()
                if len(temp_results) > 0:
                    temp_results[
                        "rep"] = temp_results["rep"] + n_reps_per_chunk * chunk
                    temp_results.to_csv(intermediate_fname)
                print(
                    f"Collate done in {time.time()-collate_start} seconds, {len(bench.futures)}/{bench.num_benchmarks} left"
                )

            print(f"{benchmark_name} chunk {chunk} fully done!")
            final_results = bench.pandas()
            final_results[
                "rep"] = final_results["rep"] + n_reps_per_chunk * chunk
            final_results.to_csv(out_fname)