def _measure_memory(self, func: Callable[[], None]) -> [Memory, MemorySummary]: try: if self.args.trace_memory_line_by_line: trace = start_memory_tracing("transformers") if self.args.is_tpu: # tpu raise NotImplementedError( "Memory Benchmarking is currently not implemented for TPU. Please disable memory benchmarking with `--no_memory` or `args.no_memory=True`" ) elif self.args.is_gpu: if not is_py3nvml_available(): logger.warning( "py3nvml not installed, we won't log GPU memory usage. " "Install py3nvml (pip install py3nvml) to log information about GPU." ) memory = "N/A" else: logger.info( "Measuring total GPU usage on GPU device. Make sure to not have additional processes running on the same GPU." ) # init nvml nvml.nvmlInit() func() handle = nvml.nvmlDeviceGetHandleByIndex( self.args.device_idx) meminfo = nvml.nvmlDeviceGetMemoryInfo(handle) max_bytes_in_use = meminfo.used memory = Memory(max_bytes_in_use) # shutdown nvml nvml.nvmlShutdown() else: # cpu memory_bytes = measure_peak_memory_cpu(func) memory = Memory(memory_bytes) if isinstance( memory_bytes, int) else memory_bytes if self.args.trace_memory_line_by_line: summary = stop_memory_tracing(trace) else: summary = None return memory, summary except RuntimeError as e: self.print_fn("Doesn't fit on GPU. {}".format(e)) return "N/A", None
def _measure_memory(self, func: Callable[[], None]) -> [Memory, MemorySummary]: logger.info("Note that Tensorflow allocates more memory than" "it might need to speed up computation." "The memory reported here corresponds to the memory" "reported by `nvidia-smi`, which can vary depending" "on total available memory on the GPU that is used.") with self.args.strategy.scope(): try: if self.args.trace_memory_line_by_line: assert ( self.args.eager_mode ), "`args.eager_mode` is set to `False`. Make sure to run model in eager mode to measure memory consumption line by line." trace = start_memory_tracing("transformers") if self.args.is_tpu: # tpu raise NotImplementedError( "Memory Benchmarking is currently not implemented for TPU. Please disable memory benchmarking with `args.no_memory=True`" ) elif self.args.is_gpu: # gpu if not is_py3nvml_available(): logger.warning( "py3nvml not installed, we won't log GPU memory usage. " "Install py3nvml (pip install py3nvml) to log information about GPU." ) memory = "N/A" else: logger.info( "Measuring total GPU usage on GPU device. Make sure to not have additional processes running on the same GPU." ) # init nvml nvml.nvmlInit() func() handle = nvml.nvmlDeviceGetHandleByIndex( self.args.device_idx) meminfo = nvml.nvmlDeviceGetMemoryInfo(handle) max_bytes_in_use = meminfo.used memory = Memory(max_bytes_in_use) # shutdown nvml nvml.nvmlShutdown() else: # cpu if self.args.trace_memory_line_by_line: logger.info( "When enabling line by line tracing, the max peak memory for CPU is inaccurate in Tensorflow." ) memory = None else: memory_bytes = measure_peak_memory_cpu(func) memory = Memory(memory_bytes) if isinstance( memory_bytes, int) else memory_bytes if self.args.trace_memory_line_by_line: summary = stop_memory_tracing(trace) if memory is None: memory = summary.total else: summary = None return memory, summary except ResourceExhaustedError as e: self.print_fn("Doesn't fit on GPU. {}".format(e)) return "N/A", None
from .benchmark_utils import ( Benchmark, Memory, MemorySummary, measure_peak_memory_cpu, start_memory_tracing, stop_memory_tracing, ) if is_tf_available(): import tensorflow as tf from .benchmark_args_tf import TensorflowBenchmarkArguments from tensorflow.python.framework.errors_impl import ResourceExhaustedError if is_py3nvml_available(): import py3nvml.py3nvml as nvml logger = logging.getLogger(__name__) def run_with_tf_optimizations(do_eager_mode: bool, use_xla: bool): def run_func(func): @wraps(func) def run_in_eager_mode(*args, **kwargs): return func(*args, **kwargs) @wraps(func) @tf.function(experimental_compile=use_xla) def run_in_graph_mode(*args, **kwargs): return func(*args, **kwargs)