def after_run(self, run_context, run_values): self._counter += 1 if self.should_save(): timeline = Timeline(step_stats=run_meta.step_stats) ctf = timeline.generate_chrome_trace_format(show_memory=True) with open(self.get_ctf(), "w+") as f: f.write(ctf)
def fit(self, x, y, validation_data=None, epochs=100, patience=0, verbose=None, min_delta=0, tensorboard=False, timeline=False, **keras_kwargs): if validation_data is None: validation_data = self.model.pipeline.encoded_validation_data if isinstance(x, pandas.DataFrame): x = x.to_dict(orient='series') if isinstance(validation_data.x, pandas.DataFrame): validation_data = Observations( x=validation_data.x.to_dict(orient='series'), y=validation_data.y) if not self.keras or not self.optimizer: self.build() with self.session.as_default(): if timeline: run_metadata = tensorflow.RunMetadata() else: run_metadata = None self.keras.compile( loss=self.loss, optimizer=self.optimizer, options=tensorflow.RunOptions( trace_level=tensorflow.RunOptions.FULL_TRACE), run_metadata=run_metadata) if verbose is None: verbose = 1 if lore.env.name == lore.env.DEVELOPMENT else 0 logger.info('\n'.join([ '\n\n\n Fitting', '==============================', '| batch | learning | |', '| size | rate | decay |', '------------------------------', '| %5i | %8.6f | %7.5f |' % ( self.batch_size, self.learning_rate, self.decay, ), '==============================\n\n' ])) reload_best = ReloadBest( filepath=self.model.checkpoint_path(), monitor=self.monitor, mode='auto', ) callbacks = self.callbacks() callbacks += [ reload_best, TerminateOnNaN(), EarlyStopping( monitor=self.monitor, min_delta=min_delta, patience=patience, verbose=verbose, mode='auto', ), ] if tensorboard: callbacks += [ TensorBoard(log_dir=self.model.serializer.tensorboard_path, histogram_freq=1, batch_size=self.batch_size, write_graph=True, write_grads=True, write_images=True, embeddings_freq=1, embeddings_metadata=None) ] with self.session.as_default(): self.history = self.keras.fit( x=x, y=[y] * self.towers, validation_data=Observations(x=validation_data.x, y=[validation_data.y] * self.towers), batch_size=self.batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, **keras_kwargs).history if timeline: with open(self.model.timeline_path(), 'w') as f: f.write( Timeline(step_stats=run_metadata.step_stats). generate_chrome_trace_format()) return { 'epochs': len(self.history['loss']), 'train': reload_best.train_loss, 'validate': reload_best.validate_loss, }
# from https://github.com/tensorflow/tensorflow/issues/7251 import os os.environ["CUDA_VISIBLE_DEVICES"]="0" import tensorflow as tf from tensorflow.python.client.timeline import Timeline with tf.device("/gpu:0"): x = tf.ones(100, name="x") idxs = tf.range(100) for i in range(10): y = tf.identity(x, name="identity-"+str(i)) x = tf.dynamic_stitch([idxs, idxs], [x, y], name="stitch-"+str(i)) config = tf.ConfigProto(graph_options=tf.GraphOptions(optimizer_options=tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0))) sess = tf.InteractiveSession(config=config) metadata = tf.RunMetadata() sess.run(x, options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE, output_partition_graphs=True), run_metadata=metadata) timeline = Timeline(metadata.step_stats) with open("dynamic_stitch_gpu_profile.json", "w") as f: f.write(timeline.generate_chrome_trace_format()) with open("dynamic_stitch_gpu_profile.pbtxt", "w") as f: f.write(str(metadata))
def export_run_metadata(run_metadata, path): file_name = datetime.now().strftime("%Y%m%d-%H%M%S") + ".json" time_line = Timeline(run_metadata.step_stats) # pylint: disable=E1101 ctf = time_line.generate_chrome_trace_format() write_zippped_file(path=join(path, file_name), data=ctf)
# from https://github.com/tensorflow/tensorflow/issues/7251 import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" import tensorflow as tf from tensorflow.python.client.timeline import Timeline with tf.device("/gpu:0"): x = tf.ones(100, name="x") idxs = tf.range(100) for i in range(10): y = tf.identity(x, name="identity-" + str(i)) x = tf.dynamic_stitch([idxs, idxs], [x, y], name="stitch-" + str(i)) config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0))) sess = tf.InteractiveSession(config=config) metadata = tf.RunMetadata() sess.run(x, options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE, output_partition_graphs=True), run_metadata=metadata) timeline = Timeline(metadata.step_stats) with open("dynamic_stitch_gpu_profile.json", "w") as f: f.write(timeline.generate_chrome_trace_format()) with open("dynamic_stitch_gpu_profile.pbtxt", "w") as f: f.write(str(metadata))
# gpu_opt = tf.GPUOptions(per_process_gpu_memory_fraction=0.75) # conf = tf.ConfigProto(gpu_options=gpu_opt) # session = tf.InteractiveSession(config=conf) session = tf.InteractiveSession() session.run(tf.initialize_all_variables()) run_metadata = tf.RunMetadata() startTime = time.time() for index in range(2): if index == 1: #reset start time due to GPU overhead startTime = time.time() res = session.run( reduced, options=tf.RunOptions(trace_level=tf.RunOptions.SOFTWARE_TRACE), run_metadata=run_metadata) print(res) currentTime = time.time() print(index, "TotalTime", (currentTime - startTime)) trace = Timeline(step_stats=run_metadata.step_stats) trace_file = open('timeline.ctf.json', 'w') trace_file.write(trace.generate_chrome_trace_format())