Esempio n. 1
0
    def test_can_cast_dtype(self):
        # Ensure that the data loader can only be used once
        def load_data():
            yield {"X": np.ones((1, 1), dtype=np.float32)}
        cache = DataLoaderCache(load_data())

        fp32_meta = TensorMetadata().add("X", dtype=np.float32, shape=(1, 1))
        cache.set_input_metadata(fp32_meta)
        feed_dict = cache[0]
        assert feed_dict["X"].dtype == np.float32

        fp64_meta = TensorMetadata().add("X", dtype=np.float64, shape=(1, 1))
        cache.set_input_metadata(fp64_meta)
        feed_dict = cache[0]
        assert feed_dict["X"].dtype == np.float64
Esempio n. 2
0
    def test_will_not_give_up_on_first_cache_miss(self):
        SHAPE = (32, 32)

        DATA = [OrderedDict()]
        DATA[0]["X"] = np.zeros(SHAPE, dtype=np.int64)
        DATA[0]["Y"] = np.zeros(SHAPE, dtype=np.int64)

        cache = DataLoaderCache(DATA)
        cache.set_input_metadata(TensorMetadata().add("X", np.int64, shape=SHAPE).add("Y", np.int64, SHAPE))

        # Populate the cache with bad X but good Y
        cache.cache[0] = OrderedDict()
        cache.cache[0]["X"] = np.ones((64, 64), dtype=np.int64)
        cache.cache[0]["Y"] = np.ones(SHAPE, dtype=np.int64)

        feed_dict = cache[0]
        # Cache cannot reuse X, so it'll reload - we'll get all 0s from the data loader
        assert np.all(feed_dict["X"] == 0)
        # Cache can reuse Y, even though it's after X, so we'll get ones from the cache
        assert np.all(feed_dict["Y"] == 1)
Esempio n. 3
0
    def run(
        runners,
        data_loader=None,
        warm_up=None,
        use_subprocess=None,
        subprocess_timeout=None,
        subprocess_polling_interval=None,
        save_inputs_path=None,
    ):
        """
        Runs the supplied runners sequentially.

        Args:
            runners (List[BaseRunner]):
                    A list of runners to run.
            data_loader (Generator -> OrderedDict[str, numpy.ndarray]):
                    A generator or iterable that yields a dictionary that maps input names to input numpy buffers.
                    In the simplest case, this can be a `List[Dict[str, numpy.ndarray]]` .

                    In case you don't know details about the inputs ahead of time, you can access the
                    `input_metadata` property in your data loader, which will be set to an `TensorMetadata`
                    instance by this function.
                    Note that this does not work for generators or lists.

                    The number of iterations run by this function is controlled by the number of items supplied
                    by the data loader.

                    Defaults to an instance of `DataLoader`.
            warm_up (int):
                    The number of warm up runs to perform for each runner before timing.
                    Defaults to 0.
            use_subprocess (bool):
                    Whether each runner should be run in a subprocess. This allows each runner to have exclusive
                    access to the GPU. When using a subprocess, runners and loaders will never be modified.
            subprocess_timeout (int):
                    The timeout before a subprocess is killed automatically. This is useful for handling processes
                    that never terminate. A value of None disables the timeout. Defaults to None.
            subprocess_polling_interval (int):
                    The polling interval, in seconds, for checking whether a subprocess has completed or crashed.
                    In rare cases, omitting this parameter when subprocesses are enabled may cause this function
                    to hang indefinitely if the subprocess crashes.
                    A value of 0 disables polling. Defaults to 30 seconds.
            save_inputs_path (str):
                    [EXPERIMENTAL] Path at which to save inputs used during inference. This will include all inputs generated by
                    the provided data_loader, and will be saved as a JSON List[Dict[str, numpy.ndarray]].

        Returns:
            RunResults:
                    A mapping of runner names to the results of their inference.
                    The ordering of `runners` is preserved in this mapping.
        """
        warm_up = util.default(warm_up, 0)
        data_loader = util.default(data_loader, DataLoader())
        use_subprocess = util.default(use_subprocess, False)
        subprocess_polling_interval = util.default(subprocess_polling_interval,
                                                   30)
        loader_cache = DataLoaderCache(data_loader,
                                       save_inputs_path=save_inputs_path)

        def execute_runner(runner, loader_cache):
            with runner as active_runner:
                # DataLoaderCache will ensure that the feed_dict does not contain any extra entries
                # based on the provided input_metadata.
                loader_cache.set_input_metadata(
                    active_runner.get_input_metadata())

                if warm_up:
                    G_LOGGER.start("{:35} | Running {:} warm-up run(s)".format(
                        active_runner.name, warm_up))
                    try:
                        feed_dict = loader_cache[0]
                    except IndexError:
                        G_LOGGER.warning(
                            "{:} warm-up run(s) were requested, but data loader did not supply any data. "
                            "Skipping warm-up run(s)".format(warm_up))
                    else:
                        G_LOGGER.ultra_verbose(
                            "Warm-up Input Buffers:\n{:}".format(
                                util.indent_block(feed_dict)))
                        # First do a few warm-up runs, and don't time them.
                        for _ in range(warm_up):
                            active_runner.infer(feed_dict=feed_dict)
                    G_LOGGER.finish(
                        "{:35} | Finished {:} warm-up run(s)".format(
                            active_runner.name, warm_up))

                # Then, actual iterations.
                index = 0
                iteration_results = []

                total_runtime = 0
                for index, feed_dict in enumerate(loader_cache):
                    G_LOGGER.info(
                        "{:35}\n---- Inference Input(s) ----\n{:}".format(
                            active_runner.name,
                            TensorMetadata().from_feed_dict(feed_dict)),
                        mode=LogMode.ONCE,
                    )

                    G_LOGGER.extra_verbose(
                        lambda: "{:35} | Feeding inputs:\n{:}".format(
                            active_runner.name, util.indent_block(feed_dict)))
                    outputs = active_runner.infer(feed_dict=feed_dict)

                    runtime = active_runner.last_inference_time()
                    total_runtime += runtime
                    # Without a deep copy here, outputs will always reference the output of the last run
                    iteration_results.append(
                        IterationResult(outputs=copy.deepcopy(outputs),
                                        runtime=runtime,
                                        runner_name=active_runner.name))

                    G_LOGGER.info(
                        "{:35}\n---- Inference Output(s) ----\n{:}".format(
                            active_runner.name,
                            TensorMetadata().from_feed_dict(outputs)),
                        mode=LogMode.ONCE,
                    )
                    G_LOGGER.extra_verbose(
                        lambda:
                        "{:35} | Inference Time: {:.3f} ms | Received outputs:\n{:}"
                        .format(active_runner.name, runtime * 1000.0,
                                util.indent_block(outputs)))

                total_runtime_ms = total_runtime * 1000.0
                G_LOGGER.finish(
                    "{:35} | Completed {:} iteration(s) in {:.4g} ms | Average inference time: {:.4g} ms."
                    .format(active_runner.name, index + 1, total_runtime_ms,
                            total_runtime_ms / float(index + 1)))
                return iteration_results

        # Wraps execute_runner to use a queue.
        def execute_runner_with_queue(runner_queue, runner, loader_cache):
            iteration_results = None
            try:
                iteration_results = execute_runner(runner, loader_cache)
            except:
                # Cannot necessarily send the exception back over the queue.
                G_LOGGER.backrace()
            util.try_send_on_queue(runner_queue, iteration_results)
            # After finishing, send the updated loader_cache back.
            util.try_send_on_queue(runner_queue, loader_cache)

        # Do all inferences in one loop, then comparisons at a later stage.
        # We run each runner in a separate process so that we can provide exclusive GPU access for each runner.
        run_results = RunResults()

        if not runners:
            G_LOGGER.warning(
                "No runners were provided to Comparator.run(). Inference will not be run, and run results will be empty."
            )

        for runner in runners:
            G_LOGGER.start("{:35} | Activating and starting inference".format(
                runner.name))
            if use_subprocess:
                runner_queue = Queue()
                process = Process(target=execute_runner_with_queue,
                                  args=(runner_queue, runner, loader_cache))
                process.start()

                # If a subprocess hangs in a certain way, then process.join could block forever. Hence,
                # we need to keep polling the process to make sure it really is alive.
                iteration_results = None
                while process.is_alive() and iteration_results is None:
                    try:
                        iteration_results = util.try_receive_on_queue(
                            runner_queue,
                            timeout=subprocess_polling_interval / 2)
                        # Receive updated loader cache, or fall back if it could not be sent.
                        loader_cache = util.try_receive_on_queue(
                            runner_queue,
                            timeout=subprocess_polling_interval / 2)
                    except queue.Empty:
                        G_LOGGER.extra_verbose(
                            "Polled subprocess - still running")

                try:
                    assert iteration_results is not None
                    run_results.append((runner.name, iteration_results))
                    process.join(subprocess_timeout)
                except:
                    G_LOGGER.critical(
                        "{:35} | Terminated prematurely. Check the exception logged above. "
                        "If there is no exception logged above, make sure not to use the --use-subprocess "
                        "flag or set use_subprocess=False in Comparator.run()."
                        .format(runner.name))
                finally:
                    process.terminate()

                if loader_cache is None:
                    G_LOGGER.critical(
                        "Could not send data loader cache to runner subprocess. Please try disabling subprocesses "
                        "by removing the --use-subprocess flag, or setting use_subprocess=False in Comparator.run()"
                    )
            else:
                run_results.append(
                    (runner.name, execute_runner(runner, loader_cache)))

        G_LOGGER.verbose("Successfully ran: {:}".format(
            [r.name for r in runners]))
        return run_results
Esempio n. 4
0
    def run(runners,
            data_loader=None,
            warm_up=None,
            use_subprocess=None,
            subprocess_timeout=None,
            subprocess_polling_interval=None):
        """
        Runs the supplied runners sequentially.

        Args:
            data_loader (Generator -> OrderedDict[str, np.ndarray]):
                    A generator or iterable that yields a dictionary that maps input names to input numpy buffers.
                    In the simplest case, this can be a `List[Dict[str, np.ndarray]]` .

                    In case you don't know details about the inputs ahead of time, you can access the
                    `input_metadata` property in your data loader, which will be set to an `TensorMetadata`
                    instance by this function.
                    Note that this does not work for generators or lists.

                    The number of iterations run by this function is controlled by the number of items supplied
                    by the data loader.

                    Defaults to an instance of `DataLoader`.
            warm_up (int):
                    The number of warm up runs to perform for each runner before timing.
                    Defaults to 0.
            use_subprocess (bool):
                    Whether each runner should be run in a subprocess. This allows each runner to have exclusive
                    access to the GPU. When using a subprocess, runners and loaders will never be modified.
            subprocess_timeout (int):
                    The timeout before a subprocess is killed automatically. This is useful for handling processes
                    that never terminate. A value of None disables the timeout. Defaults to None.
            subprocess_polling_interval (int):
                    The polling interval, in seconds, for checking whether a subprocess has completed or crashed.
                    In rare cases, omitting this parameter when subprocesses are enabled may cause this function
                    to hang indefinitely if the subprocess crashes.
                    A value of 0 disables polling. Defaults to 30 seconds.

        Returns:
            RunResults: A mapping of runner names to the results of their inference. The ordering of `runners` is preserved in this mapping.
        """
        warm_up = misc.default_value(warm_up, 0)
        data_loader = misc.default_value(data_loader, DataLoader())
        use_subprocess = misc.default_value(use_subprocess, False)
        subprocess_polling_interval = misc.default_value(
            subprocess_polling_interval, 30)
        loader_cache = DataLoaderCache(data_loader)

        def execute_runner(runner, loader_cache):
            with runner as active_runner:
                input_metadata = active_runner.get_input_metadata()
                G_LOGGER.verbose("Runner: {:40} | Input Metadata:\n{:}".format(
                    active_runner.name, misc.indent_block(input_metadata)))
                loader_cache.set_input_metadata(input_metadata)

                if warm_up:
                    G_LOGGER.info(
                        "Runner: {:40} | Running {:} warm-up runs".format(
                            active_runner.name, warm_up))
                    try:
                        feed_dict = loader_cache[0]
                    except IndexError:
                        G_LOGGER.warning(
                            "{:} warm-up runs were requested, but data loader did not supply any data. "
                            "Skipping warm-up runs".format(warm_up))
                    else:
                        G_LOGGER.ultra_verbose(
                            "Warm-up Input Buffers:\n{:}".format(
                                misc.indent_block(feed_dict)))
                        # First do a few warm-up runs, and don't time them.
                        for i in range(warm_up):
                            active_runner.infer(feed_dict=feed_dict)

                # Then, actual iterations.
                total_time = 0
                run_results = []
                for feed_dict in loader_cache:
                    G_LOGGER.extra_verbose(
                        lambda: "Runner: {:40} | Feeding inputs:\n{:}".format(
                            active_runner.name, misc.indent_block(feed_dict)))
                    outputs = active_runner.infer(feed_dict=feed_dict)

                    runtime = active_runner.last_inference_time()
                    # Without a deep copy here, outputs will always reference the output of the last run
                    run_results.append(
                        IterationResult(outputs=copy.deepcopy(outputs),
                                        runtime=runtime,
                                        runner_name=active_runner.name))

                    if len(run_results) == 1:
                        output_metadata = TensorMetadata()
                        for name, out in outputs.items():
                            output_metadata.add(name, out.dtype, out.shape)

                    G_LOGGER.verbose(
                        "Runner: {:40} | Output Metadata:\n{:}".format(
                            active_runner.name,
                            misc.indent_block(output_metadata)),
                        mode=LogMode.ONCE)
                    G_LOGGER.extra_verbose(
                        lambda:
                        "Runner: {:40} | Inference Time: {:.3f} ms | Received outputs:\n{:}"
                        .format(active_runner.name, runtime * 1000.0,
                                misc.indent_block(outputs)))

                G_LOGGER.info(
                    "Runner: {:40} | Completed {:} iterations.".format(
                        active_runner.name, len(run_results)))
                return run_results

        # Wraps execute_runner to use a queue.
        def execute_runner_with_queue(runner_queue, runner, loader_cache):
            run_results = None
            try:
                run_results = execute_runner(runner, loader_cache)
            except:
                # Cannot send the exception back, as it is not necessarily pickleable
                import traceback
                G_LOGGER.error(traceback.format_exc())
            misc.try_send_on_queue(runner_queue, run_results)
            # After finishing, send the updated loader_cache back.
            misc.try_send_on_queue(runner_queue, loader_cache)

        # Do all inferences in one loop, then comparisons at a later stage.
        # We run each runner in a separate process so that we can provide exclusive GPU access for each runner.
        runner_queue = Queue()
        run_results = RunResults()
        for index, runner in enumerate(runners):
            G_LOGGER.info(
                "Runner: {:40} | Activating and starting inference".format(
                    runner.name))
            if use_subprocess:
                process = Process(target=execute_runner_with_queue,
                                  args=(runner_queue, runner, loader_cache))
                process.start()

                # If a subprocess hangs in a certain way, then process.join could block forever. Hence,
                # we need to keep polling the process to make sure it really is alive.
                run_results[runner.name] = None
                while process.is_alive() and run_results[runner.name] is None:
                    try:
                        run_results[runner.name] = misc.try_receive_on_queue(
                            runner_queue,
                            timeout=subprocess_polling_interval / 2)
                        # Receive updated loader cache, or fall back if it could not be sent.
                        loader_cache = misc.try_receive_on_queue(
                            runner_queue,
                            timeout=subprocess_polling_interval / 2)
                    except queue.Empty:
                        G_LOGGER.extra_verbose(
                            "Polled subprocess - still running")

                try:
                    assert run_results[runner.name] is not None
                    process.join(subprocess_timeout)
                except:
                    G_LOGGER.critical(
                        "Runner: {:40} | Terminated prematurely. Check the exception logged above. "
                        "If there is no exception logged above, make sure not to use the --use-subprocess "
                        "flag or set use_subprocess=False in Comparator.run()."
                        .format(runner.name))
                finally:
                    process.terminate()

                if loader_cache is None:
                    G_LOGGER.critical(
                        "Could not send data loader cache to runner subprocess. Please try disabling subprocesses "
                        "by removing the --use-subprocess flag, or setting use_subprocess=False in Comparator.run()"
                    )
            else:
                run_results[runner.name] = execute_runner(runner, loader_cache)

        G_LOGGER.verbose("Successfully ran: {:}".format(
            [r.name for r in runners]))
        return run_results