def _preprocess(self): if self.xp == numpy: start = time.time() self._running_stack.append(start) else: start = cuda.Event() stop = cuda.Event() start.record() self._running_stack.append((start, stop)) self._depth += 1
def _preprocess(self): if self.xp is numpy: start = _get_time() self._running_stack.append(start) else: assert self.xp is cuda.cupy start = cuda.Event() stop = cuda.Event() start.record() self._running_stack.append((start, stop)) self._depth += 1
def __init__(self, stream=None, compute_stream=None): self._stream = stream self.compute_stream = compute_stream self._device = None self._conveyor = collections.defaultdict(self._get_conveyor) if compute_stream is not None: # * event1 prevents a CPU thread to update arrays that might be # still being used by GPU kernels. # * event2 prevents a GPU kernel to read arrays that might be # still being transferred to GPU. self._event1 = cuda.Event() self._event2 = cuda.Event() self._sync_get = False else: self._sync_get = True