def test_profile(self): profiler.start() with traceme.TraceMe('three_times_five'): three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) with self.assertRaises(profiler.ProfilerAlreadyRunningError): profiler.start() profile_result = profiler.stop() profile_pb = trace_events_pb2.Trace() profile_pb.ParseFromString(profile_result) devices = frozenset(device.name for device in profile_pb.devices.values()) self.assertIn('/host:CPU', devices) if not test_util.IsBuiltWithROCm() and config.list_physical_devices( 'GPU'): # device tracing is not yet supported on the ROCm platform self.assertIn('/device:GPU:0', devices) events = frozenset(event.name for event in profile_pb.trace_events) self.assertIn('three_times_five', events) self.assertIn('Mul', events) with self.assertRaises(profiler.ProfilerNotRunningError): profiler.stop()
def test_profile_exceptions(self): logdir = self.get_temp_dir() profiler.start(logdir) with self.assertRaises(errors.AlreadyExistsError): profiler.start(logdir) profiler.stop() with self.assertRaises(errors.UnavailableError): profiler.stop()
def trace_off(): """Stops the current trace and discards any collected information.""" global _current_trace_context with _current_trace_context_lock: _current_trace_context = None # Disabling run_metadata disables graph collection as well. context.context().disable_run_metadata() # profiler only has start and stop. One needs to stop in order to export # and stopping when it is not running will raise an error. try: _profiler.stop() except _profiler.ProfilerNotRunningError: pass
def trace_off(): """Stops the current trace and discards any collected information.""" global _current_trace_context with _current_trace_context_lock: _current_trace_context = None # Disabling run_metadata disables graph collection as well. context.context().disable_run_metadata() # profiler only has start and stop. One needs to stop in order to export # and stopping when it is not running will raise an error. try: _profiler.stop() except _profiler.ProfilerNotRunningError: pass
def benchmark_keras_model_functional_fit_run_model_eagerly_with_profiler( self): profiler.start() model = make_keras_model(initializer="glorot_uniform") self._benchmark_keras_model_fit(model, run_eagerly=True) result = profiler.stop() assert result is not None
def on_batch_end(self, batch, logs=None): if batch == self.stop_step: results = profiler.stop() profiler.save(self.log_dir, results) tf.compat.v1.logging.info( 'Profiler saved profiles for steps between %s and %s to %s', self.start_step, self.stop_step, self.log_dir)
def benchmark_keras_model_functional_fit_graph_mode_with_profiler(self): profiler.start() with context.graph_mode(): model = make_keras_model(initializer="glorot_uniform") self._benchmark_keras_model_fit(model) result = profiler.stop() assert result is not None
def benchmark_keras_model_functional_fit_graph_mode_with_profiler(self): profiler.start() with context.graph_mode(): model = make_keras_model(initializer="glorot_uniform") self._benchmark_keras_model_fit(model) result = profiler.stop() assert result is not None
def test_profile(self): profiler.start() three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) with self.assertRaises(AssertionError): profiler.start() profile_result = profiler.stop() profile_pb = trace_events_pb2.Trace() profile_pb.ParseFromString(profile_result) profile_pb_str = '%s' % profile_pb self.assertTrue('Mul' in profile_pb_str) with self.assertRaises(AssertionError): profiler.stop()
def benchmark_keras_model_functional_fit_run_model_eagerly_with_profiler( self): profiler.start() model = make_keras_model(initializer="glorot_uniform") self._benchmark_keras_model_fit(model, run_eagerly=True) result = profiler.stop() assert result is not None
def main(): profile = None if "--run-cProfile" in sys.argv: import cProfile import pstats profile_file = "cProfile.log" print("Running cProfile, writing output to '{}'".format(profile_file), file=sys.stderr) profile = cProfile.Profile() profile.enable() parser = create_argparser() if len(sys.argv) < 2: parser.error("Too few arguments, run '{} --help' for more information.".format(lidbox.__name__)) # TODO when a subcommand is used incorrectly, get usage strings for its subparser instead of the root parser args = parser.parse_args() tf_profiler = None if args.run_tf_profiler: from tensorflow.python.eager import profiler as tf_profiler tf_profile_file = os.path.abspath(os.path.join("tf_profile", str(int(time.time())))) print("Running TensorFlow profiler, writing output to '{}'".format(tf_profile_file), file=sys.stderr) tf_profiler.start() ret = 1 try: # Initialize a Command object from the class specified in args.cmd_class and remove the class from args command = args.__dict__.pop("cmd_class")(args) ret = command.run() finally: if tf_profiler: tf_profiler_result = tf_profiler.stop() tf_profiler.save(tf_profile_file, tf_profiler_result) if profile: profile.disable() with open(profile_file, "w") as out_f: pstats.Stats(profile, stream=out_f).sort_stats("tottime").print_stats() if ret: sys.exit(ret)
def test_profile(self): profiler.start() three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) with self.assertRaises(profiler.ProfilerAlreadyRunningError): profiler.start() profile_result = profiler.stop() profile_pb = trace_events_pb2.Trace() profile_pb.ParseFromString(profile_result) profile_pb_str = '%s' % profile_pb self.assertTrue('Mul' in profile_pb_str) with self.assertRaises(profiler.ProfilerNotRunningError): profiler.stop()
def on_batch_end(self, batch, logs=None): if batch == self.stop_step: results = profiler.stop() profiler.save(self.log_dir, results) tf.compat.v1.logging.info( 'Profiler saved profiles for steps between %s and %s to %s', self.start_step, self.stop_step, self.log_dir)
def on_batch_end(self, batch, logs=None): if batch == self.stop_step_in_epoch and self.should_stop: self.should_stop = False results = profiler.stop() profiler.save(self.log_dir, results) logging.info( 'Profiler saved profiles for steps between %s and %s to %s', self.start_step, self.stop_step, self.log_dir)
def trace_off(): """Stops the current trace and discards any collected information.""" global _current_trace_context with _current_trace_context_lock: if _current_trace_context is None: return # tracing already off graph, profiler = _current_trace_context # pylint: disable=redefined-outer-name, unpacking-non-sequence _current_trace_context = None if graph: # Disabling run_metadata disables graph collection as well. context.context().disable_run_metadata() if profiler: try: _profiler.stop() except _profiler.ProfilerNotRunningError: pass
def test_save_profile(self): logdir = self.get_temp_dir() profiler.start(logdir) with traceme.TraceMe('three_times_five'): three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) profiler.stop() file_list = gfile.ListDirectory(logdir) self.assertEqual(len(file_list), 2) for file_name in gfile.ListDirectory(logdir): if gfile.IsDirectory(os.path.join(logdir, file_name)): self.assertEqual(file_name, 'plugins') else: self.assertTrue(file_name.endswith('.profile-empty')) profile_dir = os.path.join(logdir, 'plugins', 'profile') run = gfile.ListDirectory(profile_dir)[0] hostname = socket.gethostname() overview_page = os.path.join(profile_dir, run, hostname + '.overview_page.pb') self.assertTrue(gfile.Exists(overview_page)) input_pipeline = os.path.join(profile_dir, run, hostname + '.input_pipeline.pb') self.assertTrue(gfile.Exists(input_pipeline)) tensorflow_stats = os.path.join(profile_dir, run, hostname + '.tensorflow_stats.pb') self.assertTrue(gfile.Exists(tensorflow_stats)) trace_file = os.path.join(profile_dir, run, hostname + '.trace') self.assertTrue(gfile.Exists(trace_file)) with gfile.Open(trace_file, 'rb') as f: profile_pb = trace_events_pb2.Trace() profile_pb.ParseFromString(f.read()) devices = frozenset(device.name for device in profile_pb.devices.values()) self.assertIn('/host:CPU', devices) if config.list_physical_devices('GPU'): self.assertIn('/device:GPU:0', devices) events = frozenset(event.name for event in profile_pb.trace_events) self.assertIn('three_times_five', events) self.assertIn('Mul:Mul', events)
def test_profile(self): profiler.start() three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) profile_result = profiler.stop() profile_pb = tfprof_log_pb2.ProfileProto() profile_pb.ParseFromString(profile_result) profile_pb_str = '%s' % profile_pb self.assertTrue('Mul' in profile_pb_str)
def test_profile(self): profiler.start() three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) profile_result = profiler.stop() profile_pb = tfprof_log_pb2.ProfileProto() profile_pb.ParseFromString(profile_result) profile_pb_str = '%s' % profile_pb self.assertTrue('Mul' in profile_pb_str)
def trace_export(name, step=None, profiler_outdir=None): """Stops and exports the active trace as a Summary and/or profile file. Stops the trace and exports all metadata collected during the trace to the default SummaryWriter, if one has been set. Args: name: A name for the summary to be written. step: Explicit `int64`-castable monotonic step value for this summary. If omitted, this defaults to `tf.summary.experimental.get_step()`, which must not be None. profiler_outdir: Output directory for profiler. This is only used when the profiler was enabled when the trace was started. In that case, if there is a logdir-based default SummaryWriter, this defaults to the same directory, but otherwise the argument must be passed. Raises: ValueError: if a default writer exists, but no step was provided and `tf.summary.experimental.get_step()` is None. """ global _current_trace_context if ops.inside_function(): logging.warn("Cannot export trace inside a tf.function.") return if not context.executing_eagerly(): logging.warn("Can only export trace while executing eagerly.") return with _current_trace_context_lock: if _current_trace_context is None: raise ValueError("Must enable trace before export.") graph, profiler = _current_trace_context # pylint: disable=redefined-outer-name if profiler_outdir is None \ and isinstance(_summary_state.writer, ResourceSummaryWriter): logdir = _summary_state.writer._metadata.get("logdir") # pylint: disable=protected-access if logdir is not None: profiler_outdir = logdir if profiler and profiler_outdir is None: raise ValueError("Must set profiler_outdir or " "enable summary writer with logdir.") run_meta = context.context().export_run_metadata() if graph and not profiler: run_metadata_graphs(name, run_meta, step) else: run_metadata(name, run_meta, step) if profiler: _profiler.save(profiler_outdir, _profiler.stop()) trace_off()
def test_profile(self): profiler.start() three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) with self.assertRaises(profiler.ProfilerAlreadyRunningError): profiler.start() profile_result = profiler.stop() profile_pb = trace_events_pb2.Trace() profile_pb.ParseFromString(profile_result) devices = frozenset(device.name for device in profile_pb.devices.values()) self.assertIn('/host:CPU', devices) if config.list_physical_devices('GPU'): self.assertIn('/device:GPU:0', devices) events = frozenset(event.name for event in profile_pb.trace_events) self.assertIn('Mul:Mul', events) with self.assertRaises(profiler.ProfilerNotRunningError): profiler.stop()
def trace_export(name, step=None, profiler_outdir=None): """Stops and exports the active trace as a Summary and/or profile file. Stops the trace and exports all metadata collected during the trace to the default SummaryWriter, if one has been set. Args: name: A name for the summary to be written. step: Explicit `int64`-castable monotonic step value for this summary. If omitted, this defaults to `tf.summary.experimental.get_step()`, which must not be None. profiler_outdir: Output directory for profiler. It is required when profiler is enabled when trace was started. Otherwise, it is ignored. Raises: ValueError: if a default writer exists, but no step was provided and `tf.summary.experimental.get_step()` is None. """ # TODO(stephanlee): See if we can remove profiler_outdir and infer it from # the SummaryWriter's logdir. global _current_trace_context if ops.inside_function(): logging.warn("Cannot export trace inside a tf.function.") return if not context.executing_eagerly(): logging.warn("Can only export trace while executing eagerly.") return with _current_trace_context_lock: if _current_trace_context is None: raise ValueError("Must enable trace before export through " "tf.summary.trace_on.") graph, profiler = _current_trace_context # pylint: disable=redefined-outer-name if profiler and profiler_outdir is None: raise ValueError("Argument `profiler_outdir` is not specified.") run_meta = context.context().export_run_metadata() if graph and not profiler: run_metadata_graphs(name, run_meta, step) else: run_metadata(name, run_meta, step) if profiler: _profiler.save(profiler_outdir, _profiler.stop()) trace_off()
def trace_export(name, step=None, profiler_outdir=None): """Stops and exports the active trace as a Summary and/or profile file. Stops the trace and exports all metadata collected during the trace to the default SummaryWriter, if one has been set. Args: name: A name for the summary to be written. step: Explicit `int64`-castable monotonic step value for this summary. If omitted, this defaults to `tf.summary.experimental.get_step()`, which must not be None. profiler_outdir: Output directory for profiler. It is required when profiler is enabled when trace was started. Otherwise, it is ignored. Raises: ValueError: if a default writer exists, but no step was provided and `tf.summary.experimental.get_step()` is None. """ # TODO(stephanlee): See if we can remove profiler_outdir and infer it from # the SummaryWriter's logdir. global _current_trace_context if ops.inside_function(): logging.warn("Cannot export trace inside a tf.function.") return if not context.context().executing_eagerly(): logging.warn("Can only export trace while executing eagerly.") return with _current_trace_context_lock: if _current_trace_context is None: raise ValueError("Must enable trace before export.") graph, profiler = _current_trace_context # pylint: disable=redefined-outer-name if profiler and profiler_outdir is None: raise ValueError("Required profiler_outdir is not specified") run_meta = context.context().export_run_metadata() if graph and not profiler: run_metadata_graphs(name, run_meta, step) else: run_metadata(name, run_meta, step) if profiler: _profiler.save(profiler_outdir, _profiler.stop()) trace_off()
def trace_export(name, step=None, profiler_outdir=None): """Exports trace as a Summary and/or profile file. Args: name: A name for the summary to be written. step: Explicit `int64`-castable monotonic step value for this summary. If omitted, this defaults to `tf.summary.experimental.get_step()`, which must not be None. profiler_outdir: Output directory for profiler. It is required when profiler is enabled when trace was started. Otherwise, it is ignored. Returns: None Raises: ValueError: if a default writer exists, but no step was provided and `tf.summary.experimental.get_step()` is None. """ # TODO(stephanlee): See if we can remove profiler_outdir and infer it from # the SummaryWriter's logdir. global _current_trace_context if not context.context().executing_eagerly(): logging.warn("Can only export trace while executing eagerly.") return with _current_trace_context_lock: if _current_trace_context is None: raise ValueError("Must enable trace before export.") graph, profiler = _current_trace_context if profiler and profiler_outdir is None: raise ValueError("Required profiler_outdir is not specified") run_meta = context.context().export_run_metadata() if graph and not profiler: run_metadata_graphs(name, run_meta, step) else: run_metadata(name, run_meta, step) if profiler: _profiler.save(profiler_outdir, _profiler.stop()) trace_off()
def _stop_and_save_profiler(output_dir): """Stop profiler and save profiler data. Args: output_dir: log directory to place the profiler data """ from tensorflow.python.eager import profiler # pylint: disable=g-import-not-at-top try: profiler_data_dir = os.path.join(output_dir, 'profiler_data') logging.info('Stopping profiler and saving data to dir %s', profiler_data_dir) make_dir_if_not_exist(profiler_data_dir) result = profiler.stop() with open(os.path.join(profiler_data_dir, 'local.trace'), 'wb') as f: f.write(result) except Exception: # pylint: disable=W0703 logging.error('Profiler failed to stop due to error:\n %s', traceback.format_exc())
def custom_training(waveGlow, hparams, optimizer, checkpoint, manager_checkpoint): step = tf.cast(checkpoint.step, tf.int64) for epoch in tf.range(1): tf.summary.text(name='epoch', data='Start epoch {}'.format(epoch.numpy()) +\ 'at ' + datetime.now().strftime("%Y%m%d-%H%M%S"), step=step) for int_step, (step, x_train) in zip(range(50), training_dataset.enumerate(start=step)): if int_step == 2: profiler.start() train_step(step=step, x_train=x_train, waveGlow=waveGlow, hparams=hparams, optimizer=optimizer) if tf.equal(step % hparams['save_model_every'], 0): save_path = manager_checkpoint.save() tf.summary.text(name='save_checkpoint', data="Saved checkpoint in" + save_path, step=step) if tf.equal(step % hparams['save_audio_every'], 0): utils.eval_step(eval_dataset=validation_dataset, waveGlow=waveGlow, hparams=hparams, step=step) if int_step == 50: profiler_result = profiler.stop() profiler.save(hparams['log_dir'], profiler_result) break checkpoint.step.assign_add(1)
def on_batch_end(self, batch, logs=None): """Writes scalar summaries for metrics on every training batch. Performs profiling if current batch is in profiler_batches. """ # Don't output batch_size and batch number as TensorBoard summaries logs = logs or {} self._samples_seen += logs.get('size', 1) samples_seen_since = self._samples_seen - self._samples_seen_at_last_write if self.update_freq != 'epoch' and samples_seen_since >= self.update_freq: batch_logs = {('batch_' + k): v for k, v in logs.items() if k not in ['batch', 'size', 'num_steps']} self._write_custom_summaries(self._total_batches_seen, batch_logs) self._samples_seen_at_last_write = self._samples_seen self._total_batches_seen += 1 if self._is_profiling: profiler.save(self.log_dir, profiler.stop()) self._is_profiling = False elif (not self._is_profiling and self._total_batches_seen == self._profile_batch - 1): profiler.start() self._is_profiling = True
def on_batch_end(self, batch, logs=None): """Writes scalar summaries for metrics on every training batch. Performs profiling if current batch is in profiler_batches. """ # Don't output batch_size and batch number as TensorBoard summaries logs = logs or {} self._samples_seen += logs.get('size', 1) samples_seen_since = self._samples_seen - self._samples_seen_at_last_write if self.update_freq != 'epoch' and samples_seen_since >= self.update_freq: batch_logs = {('batch_' + k): v for k, v in logs.items() if k not in ['batch', 'size', 'num_steps']} self._write_custom_summaries(self._total_batches_seen, batch_logs) self._samples_seen_at_last_write = self._samples_seen self._total_batches_seen += 1 if self._is_profiling: profiler.save(self.log_dir, profiler.stop()) self._is_profiling = False elif (not self._is_profiling and self._total_batches_seen == self._profile_batch - 1): profiler.start() self._is_profiling = True
value_grad = tf.random.normal(value.shape, dtype=value.dtype) v = tf.reduce_sum(value * value_grad) grad, = tf.gradients(v, feature) return grad, value_grad @tf.function def get_gradient_gt(feature, coordinate, value_grad): value = resampler.resampler(feature, coordinate) v = tf.reduce_sum(value * value_grad) grad, = tf.gradients(v, feature) return grad mask, value = resample_image(feature, coordinate, tfnative=TFNATIVE) gt = resampler.resampler(feature, coordinate) grad, value_grad = get_gradient(feature, coordinate, TFNATIVE) value_grad = tf.where(mask[..., None], value_grad, 0) grad_gt = get_gradient_gt(feature, coordinate, value_grad) profiler.start() for i in range(3): # feature_grad, _ = get_gradient(feature, coordinate, tfnative=TFNATIVE) # print(feature_grad.shape) mask, value = resample_image(feature, coordinate, tfnative=TFNATIVE) profiler_result = profiler.stop() profiler.save('./profile.log', profiler_result)
def run(flags_obj): """Run ResNet ImageNet training and eval loop using native Keras APIs. Args: flags_obj: An object containing parsed flag values. Raises: ValueError: If fp16 is passed as it is not currently supported. Returns: Dictionary of training and eval stats. """ # TODO(tobyboyd): Remove eager flag when tf 1.0 testing ends. # Eager is default in tf 2.0 and should not be toggled if keras_common.is_v2_0(): keras_common.set_config_v2() else: config = keras_common.get_config_proto_v1() if flags_obj.enable_eager: tf.compat.v1.enable_eager_execution(config=config) else: sess = tf.Session(config=config) tf.keras.backend.set_session(sess) # Execute flag override logic for better model performance if flags_obj.tf_gpu_thread_mode: keras_common.set_gpu_thread_mode_and_count(flags_obj) dtype = flags_core.get_tf_dtype(flags_obj) if dtype == 'float16': policy = tf.keras.mixed_precision.experimental.Policy('infer_float32_vars') tf.keras.mixed_precision.experimental.set_policy(policy) data_format = flags_obj.data_format if data_format is None: data_format = ('channels_first' if tf.test.is_built_with_cuda() else 'channels_last') tf.keras.backend.set_image_data_format(data_format) strategy = distribution_utils.get_distribution_strategy( distribution_strategy=flags_obj.distribution_strategy, num_gpus=flags_obj.num_gpus, num_workers=distribution_utils.configure_cluster()) strategy_scope = keras_common.get_strategy_scope(strategy) # pylint: disable=protected-access if flags_obj.use_synthetic_data: distribution_utils.set_up_synthetic_data() input_fn = keras_common.get_synth_input_fn( height=imagenet_main.DEFAULT_IMAGE_SIZE, width=imagenet_main.DEFAULT_IMAGE_SIZE, num_channels=imagenet_main.NUM_CHANNELS, num_classes=imagenet_main.NUM_CLASSES, dtype=dtype) else: distribution_utils.undo_set_up_synthetic_data() input_fn = imagenet_main.input_fn train_input_dataset = input_fn( is_training=True, data_dir=flags_obj.data_dir, batch_size=flags_obj.batch_size, num_epochs=flags_obj.train_epochs, parse_record_fn=parse_record_keras, datasets_num_private_threads=flags_obj.datasets_num_private_threads, dtype=dtype) eval_input_dataset = None if not flags_obj.skip_eval: eval_input_dataset = input_fn( is_training=False, data_dir=flags_obj.data_dir, batch_size=flags_obj.batch_size, num_epochs=flags_obj.train_epochs, parse_record_fn=parse_record_keras, dtype=dtype) with strategy_scope: optimizer = keras_common.get_optimizer() if dtype == 'float16': # TODO(reedwm): Remove manually wrapping optimizer once mixed precision # can be enabled with a single line of code. optimizer = tf.keras.mixed_precision.experimental.LossScaleOptimizer( optimizer, loss_scale=flags_core.get_loss_scale(flags_obj)) if flags_obj.use_trivial_model: model = trivial_model.trivial_model(imagenet_main.NUM_CLASSES) else: model = resnet_model.resnet50(num_classes=imagenet_main.NUM_CLASSES, dtype=dtype) model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['sparse_categorical_accuracy']) time_callback, tensorboard_callback, lr_callback = keras_common.get_callbacks( learning_rate_schedule, imagenet_main.NUM_IMAGES['train']) train_steps = imagenet_main.NUM_IMAGES['train'] // flags_obj.batch_size train_epochs = flags_obj.train_epochs if flags_obj.train_steps: train_steps = min(flags_obj.train_steps, train_steps) train_epochs = 1 num_eval_steps = (imagenet_main.NUM_IMAGES['validation'] // flags_obj.batch_size) validation_data = eval_input_dataset if flags_obj.skip_eval: # Only build the training graph. This reduces memory usage introduced by # control flow ops in layers that have different implementations for # training and inference (e.g., batch norm). tf.keras.backend.set_learning_phase(1) num_eval_steps = None validation_data = None callbacks = [time_callback, lr_callback] if flags_obj.enable_tensorboard: callbacks.append(tensorboard_callback) if flags_obj.enable_e2e_xprof: profiler.start() history = model.fit(train_input_dataset, epochs=train_epochs, steps_per_epoch=train_steps, callbacks=callbacks, validation_steps=num_eval_steps, validation_data=validation_data, validation_freq=flags_obj.epochs_between_evals, verbose=2) if flags_obj.enable_e2e_xprof: results = profiler.stop() profiler.save(flags_obj.model_dir, results) eval_output = None if not flags_obj.skip_eval: eval_output = model.evaluate(eval_input_dataset, steps=num_eval_steps, verbose=2) stats = keras_common.build_stats(history, eval_output, time_callback) return stats
def run_profile(net, train=False, n_steps=150, do_profile=True, reps=1, dtype="float32", **kwargs): """ Run profiler on a benchmark network. Parameters ---------- net : `~nengo.Network` The nengo Network to be profiled. train : bool If True, profile the `.Simulator.fit` function. Otherwise, profile the `.Simulator.run` function. n_steps : int The number of timesteps to run the simulation. do_profile : bool Whether or not to run profiling reps : int Repeat the run this many times (only profile data from the last run will be kept). dtype : str Simulation dtype (e.g. "float32") Returns ------- exec_time : float Time (in seconds) taken to run the benchmark, taking the minimum over ``reps``. Notes ----- kwargs will be passed on to `.Simulator` """ exec_time = 1e10 n_batches = 1 with net: nengo_dl.configure_settings(inference_only=not train, dtype=dtype) with nengo_dl.Simulator(net, **kwargs) as sim: if hasattr(net, "inp"): x = { net.inp: np.random.randn(sim.minibatch_size * n_batches, n_steps, net.inp.size_out) } elif hasattr(net, "inp_a"): x = { net.inp_a: np.random.randn(sim.minibatch_size * n_batches, n_steps, net.inp_a.size_out), net.inp_b: np.random.randn(sim.minibatch_size * n_batches, n_steps, net.inp_b.size_out), } else: x = None if train: y = { net.p: np.random.randn(sim.minibatch_size * n_batches, n_steps, net.p.size_in) } sim.compile(tf.optimizers.SGD(0.001), loss=tf.losses.mse) # run once to eliminate startup overhead start = timeit.default_timer() sim.fit(x, y, epochs=1, n_steps=n_steps) print("Warmup time:", timeit.default_timer() - start) for _ in range(reps): if do_profile: profiler.start() start = timeit.default_timer() sim.fit(x, y, epochs=1, n_steps=n_steps) exec_time = min(timeit.default_timer() - start, exec_time) if do_profile: profiler.save("profile", profiler.stop()) else: # run once to eliminate startup overhead start = timeit.default_timer() sim.predict(x, n_steps=n_steps) print("Warmup time:", timeit.default_timer() - start) for _ in range(reps): if do_profile: profiler.start() start = timeit.default_timer() sim.predict(x, n_steps=n_steps) exec_time = min(timeit.default_timer() - start, exec_time) if do_profile: profiler.save("profile", profiler.stop()) exec_time /= n_batches print("Execution time:", exec_time) return exec_time
def on_train_end(self, logs=None): if self._is_profiling: profiler.save(self.log_dir, profiler.stop()) self._is_profiling = False self.writer.close()
def on_train_end(self, logs=None): if self._is_profiling: profiler.save(self.log_dir, profiler.stop()) self._is_profiling = False self.writer.close()