def benchmark_keras_model_functional_fit_run_model_eagerly_with_profiler( self): profiler.start("") model = make_keras_model(initializer="glorot_uniform") self._benchmark_keras_model_fit(model, run_eagerly=True) result = profiler.stop(save=False) assert result is not None
def test_save_profile(self): logdir = self.get_temp_dir() profiler.start(logdir) with traceme.TraceMe('three_times_five'): three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) profiler.stop() file_list = gfile.ListDirectory(logdir) self.assertEqual(len(file_list), 2) for file_name in gfile.ListDirectory(logdir): if gfile.IsDirectory(os.path.join(logdir, file_name)): self.assertEqual(file_name, 'plugins') else: self.assertTrue(file_name.endswith('.profile-empty')) profile_dir = os.path.join(logdir, 'plugins', 'profile') run = gfile.ListDirectory(profile_dir)[0] hostname = socket.gethostname() overview_page = os.path.join(profile_dir, run, hostname + '.overview_page.pb') self.assertTrue(gfile.Exists(overview_page)) input_pipeline = os.path.join(profile_dir, run, hostname + '.input_pipeline.pb') self.assertTrue(gfile.Exists(input_pipeline)) tensorflow_stats = os.path.join(profile_dir, run, hostname + '.tensorflow_stats.pb') self.assertTrue(gfile.Exists(tensorflow_stats)) kernel_stats = os.path.join(profile_dir, run, hostname + '.kernel_stats.pb') self.assertTrue(gfile.Exists(kernel_stats)) trace_file = os.path.join(profile_dir, run, hostname + '.trace.json.gz') self.assertTrue(gfile.Exists(trace_file))
def benchmark_keras_model_functional_fit_graph_mode_with_profiler(self): profiler.start("") with context.graph_mode(): model = make_keras_model(initializer="glorot_uniform") self._benchmark_keras_model_fit(model) result = profiler.stop(save=False) assert result is not None
def test_profile_exceptions(self): logdir = self.get_temp_dir() profiler.start(logdir) with self.assertRaises(errors.AlreadyExistsError): profiler.start(logdir) profiler.stop() with self.assertRaises(errors.UnavailableError): profiler.stop()
def _start_profiler(self): """Starts the profiler if currently inactive.""" if self._profiler_started: return try: profiler.start(logdir=self.log_dir) self._profiler_started = True except errors.AlreadyExistsError as e: # Profiler errors should not be fatal. logging.error('Failed to start profiler: %s', e.message)
def test_single_worker_programmatic_mode(self): """Test single worker programmatic mode.""" logdir = self.get_temp_dir() options = profiler.ProfilerOptions( host_tracer_level=2, python_tracer_level=0, device_tracer_level=1, ) profiler.start(logdir, options) _, steps, train_ds, model = _model_setup() model.fit(x=train_ds, epochs=2, steps_per_epoch=steps) profiler.stop() self._check_tools_pb_exist(logdir)
def test_profile_with_options(self): logdir = self.get_temp_dir() options = profiler.ProfilerOptions(host_tracer_level=3, python_tracer_level=1) profiler.start(logdir, options) with trace.Trace('three_times_five'): three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) profiler.stop() file_list = gfile.ListDirectory(logdir) self.assertEqual(len(file_list), 2)
def on_batch_end(self, batch, logs=None): """Writes scalar summaries for metrics on every training batch. Performs profiling if current batch is in profiler_batches. """ # Don't output batch_size and batch number as TensorBoard summaries logs = logs or {} self._samples_seen += logs.get('size', 1) samples_seen_since = self._samples_seen - self._samples_seen_at_last_write if self.update_freq != 'epoch' and samples_seen_since >= self.update_freq: batch_logs = {('batch_' + k): v for k, v in logs.items() if k not in ['batch', 'size', 'num_steps']} self._write_custom_summaries(self._total_batches_seen, batch_logs) self._samples_seen_at_last_write = self._samples_seen self._total_batches_seen += 1 if self._is_profiling: profiler.stop() self._is_profiling = False elif (not self._is_profiling and self._total_batches_seen == self._profile_batch - 1): profiler.start(self.log_dir) self._is_profiling = True
def test_profile_exceptions(self): logdir = self.get_temp_dir() profiler.start(logdir) with self.assertRaises(errors.AlreadyExistsError): profiler.start(logdir) profiler.stop() with self.assertRaises(errors.UnavailableError): profiler.stop() # Test with a bad logdir, and it correctly raises exception and deletes # profiler. # pylint: disable=anomalous-backslash-in-string profiler.start('/\/\/:123') # pylint: enable=anomalous-backslash-in-string with self.assertRaises(Exception): profiler.stop() profiler.start(logdir) profiler.stop()
def on_batch_begin(self, batch, logs=None): if batch == self.start_step_in_epoch and self.should_start: self.should_start = False profiler.start(self.log_dir) logging.info('Profiler started at Step %s', self.start_step)
processor = ProcessData(dataset_name='celeb_a', image_shape=(64, 64, 3), buffer_size=buffer_size, run_with_sample=run_with_sample) ds, *_ = processor.generate_train_and_test_partitions(batch_size, epochs) train_ds, _ = ds nets = GAN(architecture, ngf, ndf, latent_n) gen_optimizer = tf.keras.optimizers.Adam(lr) dis_optimizer = tf.keras.optimizers.Adam(lr) gan_opt = OptGAN(nets, gen_optimizer, dis_optimizer, batch_size, latent_n) print(f'Running on TF version {tf.__version__}') tic = time.time() # tf.profiler.experimental.start('./profiler/gan_tf/') profiler.warmup() profiler.start('./profiler/gan_tf/') mean_time = tfk.metrics.Mean() for epoch in range(epochs): start_time = time.time() gan_opt.train_on_epoch(train_ds) end_time = time.time() mean_time(end_time - start_time) message = f'Epoch: {epoch + 1:4d} |' message += f'{end_time - start_time:4.2f} sec' print(message) toc = time.time() # tf.profiler.experimental.stop() profiler.stop() print(f'It took {toc-tic:4.2f} sec')
def on_train_batch_begin(self, batch, logs=None): if (not self._is_profiling and self._total_batches_seen == self._profile_batch - 1): profiler.start(self.log_dir) self._is_profiling = True
def on_train_begin(self, logs=None): if self.profiler_type == 'trace': profiler.start(logdir=os.path.join(self.log_dir, 'trace'))
print('Processing batch {} of {}...'.format(batch, args.num_batches)) batch_data = tf.random.uniform(shape=[args.batch_size, 224, 224, 3]) model(batch_data) makespan_start_time = time.time() if args.profile_type == 'none': for batch in range(1, 1 + args.num_batches): batch_start_time = time.time() do_batch(batch) batch_end_time = time.time() print('{}\t{}\t0'.format(batch, batch_end_time - batch_start_time), file=log_batch_times) elif args.profile_type == 'trace': profiler.start(logdir=os.path.join(args.log_dir, 'trace')) for batch in range(1, 1 + args.num_batches): batch_start_time = time.time() do_batch(batch) batch_end_time = time.time() print('{}\t{}\t0'.format(batch, batch_end_time - batch_start_time), file=log_batch_times) profile_start_time = time.time() profiler.stop() profile_end_time = time.time() print('-1\t0\t{}'.format(profile_end_time - profile_start_time), file=log_batch_times) else: for batch in range(1, 1 + args.num_batches): profiler.start( logdir=os.path.join(args.log_dir, 'trace_batch{}'.format(batch)))
def on_epoch_begin(self, epoch, logs=None): self.epoch_start_time = time.time() if self.profiler_type == 'trace_per_epoch': profiler.start(logdir=os.path.join(self.log_dir, 'trace_epoch{}'.format(epoch)))
def on_train_begin(self, logs=None): if self._profile_batch == 1: profiler.start(self.log_dir) self._is_profiling = True
path = "data/Confocal_MICE/raw/training_raw.npy" # Load the training image data = np.load(path).astype(np.float32) # We are loading the histogram from the 'Convallaria-1-CreateNoiseModel' notebook # histogram = np.load(path + 'noiseModel.npy') # Create a NoiseModel object from the histogram. # noiseModel = hist_noise_model.NoiseModel(histogram) logging.config.fileConfig("configs/logging.conf") # TODO: how to deal with the noise model being a part of the model config as opposed to something generated from the data model_config = yaml2namespace(join('unittests', 'assets', 'ppn2v_model.yaml')) training_config = yaml2namespace(join('configs', 'training_config.yaml')) # data, mean, std = load_data(data, batch_size=training_config.batch_size, # patch_size=training_config.patch_size, # num_pix=100 * 100 // 32, supervised=False) train_data, val_data, mean, std = load_dataset('data/test_records') model = PPN2V(model_config, mean, std) profiler_v2.warmup() profiler_v2.start(logdir='model_instances/cheese') model.train(train_data, training_config) profiler_v2.stop()