def LoadFileSystem(): try: tf.load_file_system_library(dmo_fs_lib) return True except Exception as e: print("[ERROR]: failed to load DMO.") print(e) return False
def LoadFileSystem(): try: print("Loading DMO FileSystem...", end="") tf.load_file_system_library(dmo_fs_lib) print("[OK]") return True except Exception as e: print("[ERROR]") print(e) return False
def measure_pre_throughput(): # import_psutil() if FLAGS.cluster == "summit": tf.load_file_system_library( "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.9.0") dataset = dataset_factory.get_dataset(FLAGS.dataset_name, "train", FLAGS.dataset_dir) network_fn = nets_factory.get_network_fn(FLAGS.model_name, num_classes=dataset.num_classes, weight_decay=FLAGS.weight_decay, is_training=True) train_image_size = network_fn.default_image_size print("Model:", FLAGS.model_name) #Default graph, default to using cpu with tf.get_default_graph().as_default(), tf.device('/cpu:0'): global_step = tf.Variable(0, name="global_step", trainable=False) eater = get_precessing_eater(dataset, train_image_size) summaries = tf.get_collection(tf.GraphKeys.SUMMARIES) summary_op = tf.summary.merge(summaries) init = tf.global_variables_initializer() # cpu_util_summary() sess = tf.Session(config=get_config()) coord = tf.train.Coordinator() tf.train.start_queue_runners(sess=sess, coord=coord) sess.run(init) summary_writer = tf.summary.FileWriter(train_dir, sess.graph) print("Warming up") sys.stdout.flush() #Warmup for i in range(10): sess.run(eater) print("Profiling") sys.stdout.flush() times = [] num_batches = 10 for i in range(num_batches): start = time.time() sess.run(eater) end = time.time() times.append(end - start) summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, i + 1) avg_time = sum(times) / len(times) batches_per_sec = 1. / avg_time images_per_sec = batches_per_sec * FLAGS.batch_size print("Batches/sec =", batches_per_sec) print("Images/sec =", images_per_sec) sys.stdout.flush()
def setUp(self): file_system_library = os.path.join(tf.resource_loader.get_data_files_path(), "test_file_system.so") tf.load_file_system_library(file_system_library)
def get_main_ops(): # import_psutil() os.system("sh gpu_util.sh > " + train_dir + "/nvidia_smi_reports.log 2>&1 &") # cpu_logger = CPULogger() # cpu_logger.start() if FLAGS.cluster == "summit": tf.load_file_system_library( "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.9.0") # print(os.popen("ls /usr/local/cuda/extras/CUPTI/lib64").read()) main_start_t = time.time() dataset = dataset_factory.get_dataset(FLAGS.dataset_name, "train", FLAGS.dataset_dir) network_fn = nets_factory.get_network_fn(FLAGS.model_name, num_classes=dataset.num_classes, weight_decay=FLAGS.weight_decay, is_training=True) train_image_size = network_fn.default_image_size print("Model:", FLAGS.model_name) #Default graph, default to using cpu #with tf.Graph().as_default(), tf.device('/cpu:0'): with tf.get_default_graph().as_default(), tf.device('/cpu:0'): global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) if FLAGS.num_gpus > 1: all_grads = [] tower_batches = get_tower_batches(dataset, train_image_size) # done_pre_queue = get_preprocessed_queue(dataset, train_image_size) #Get gradients for each tower for gpu_idx in range(FLAGS.num_gpus): with tf.device("/device:GPU:%d" % gpu_idx): with tf.name_scope("tower_%d" % gpu_idx) as scope: # image, label = done_pre_queue.dequeue() image, label = tower_batches[gpu_idx] loss = compute_loss(image, label, dataset.num_classes, network_fn, scope, gpu_idx) tf.get_variable_scope().reuse_variables() #Summaries on first GPU # if gpu_idx == 0: # summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) my_grads = optimizer.compute_gradients( loss) #, colocate_gradients_with_ops=True) all_grads.append(my_grads) avg_grads = average_gradients(all_grads) train_op = optimizer.apply_gradients(avg_grads) else: # Optimized single GPU code done_pre_queue = get_preprocessed_queue(dataset, train_image_size) image, label = done_pre_queue.dequeue() images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_batching_threads, capacity=FLAGS.batch_queue_size * FLAGS.batch_size, shapes=[image.get_shape(), []]) labels = slim.one_hot_encoding(labels, dataset.num_classes) with tf.device("/device:GPU:0"): with tf.name_scope("compute") as scope: loss = compute_loss(images, labels, dataset.num_classes, network_fn, scope, 0) tf.get_variable_scope().reuse_variables() my_grads = optimizer.compute_gradients( loss) #, colocate_gradients_with_ops=True) train_op = optimizer.apply_gradients(my_grads) #Extra summaries # summaries.append(tf.summary.scalar('learning_rate', lr)) # tf.summary.scalar('learning_rate', lr) # cpu_util_summary() # def get_gpu_util(idx): # util_level = int(os.popen("nvidia-smi -i %d --format=csv --query-gpu=utilization.gpu | tail -n 1 | egrep -o [0-9]+" % idx).read().strip()) # return numpy.float32(float(util_level)) # for i in range(FLAGS.num_gpus): # gpu_util_tensor = tf.py_func(get_gpu_util, [i], tf.float32) # tf.summary.scalar("gpu %d util" % i, gpu_util_tensor) # def get_gpu_power(idx): # power = int(os.popen("nvidia-smi -i %d --format=csv --query-gpu=power.draw | tail -n 1 | egrep -o [0-9]+ | head -n 1" % idx).read().strip()) # return numpy.float32(float(power)) # for i in range(FLAGS.num_gpus): # gpu_power_tensor = tf.py_func(get_gpu_power, [i], tf.float32) # tf.summary.scalar("gpu %d power (Watts)" % i, gpu_power_tensor) summaries = tf.get_collection(tf.GraphKeys.SUMMARIES) summary_op = tf.summary.merge(summaries) #CHECK OVER # summary_op = tf.group(summary_op, train_op) #Lastly, create initializer init = tf.global_variables_initializer() return init, train_op, summary_op
from __future__ import absolute_import from __future__ import division from __future__ import print_function import argparse, multiprocessing, os, sys from pathlib import Path os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf tf.logging.set_verbosity(tf.logging.WARN) if Path('lib/s3_file_system.so').exists(): tf.load_file_system_library('lib/s3_file_system.so') def cnn_train(args): from cnn.train import run run( args.device, args.model, args.topology, args.version, args.evalsource, args.trainsource, args.threads, args.prefetch, args.batch, args.steps, args.epochs,
import tensorflow as tf import zipfile sys.path.insert(0, "third_party/syntaxnet") from convert import convert_model from dragnn.protos import spec_pb2 from dragnn.python import dragnn_ops from dragnn.python import graph_builder from dragnn.python import trainer_lib from dragnn.python import check from google.protobuf import text_format from tensorflow.python.platform import gfile # tf.load_op_library('bazel-bin/sling/nlp/parser/trainer/sempar.so') tf.load_file_system_library('bazel-bin/sling/nlp/parser/trainer/sempar.so') flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_string('master_spec', '', 'Path to a complete dragnn master spec text proto.') flags.DEFINE_string('hyperparams', '', 'Training grid spec text proto.') flags.DEFINE_string('output_folder', '', 'Full path of the output folder.') flags.DEFINE_string('commons', '', 'Path to commons.') flags.DEFINE_string('train_corpus', '', 'Training corpus.') flags.DEFINE_string('dev_corpus', '', 'Dev corpus with gold frames.') flags.DEFINE_string('tf_master', '', 'TensorFlow execution engine to connect to.') flags.DEFINE_integer('train_steps', 200000, 'Number of training steps') flags.DEFINE_integer('report_every', 500, 'Checkpoint interval')