Example #1
0
def LoadFileSystem():
    try:
        tf.load_file_system_library(dmo_fs_lib)
        return True
    except Exception as e:
        print("[ERROR]: failed to load DMO.")
        print(e)
    return False
Example #2
0
 def LoadFileSystem():
   try:
       print("Loading DMO FileSystem...", end="")
       tf.load_file_system_library(dmo_fs_lib)
       print("[OK]")
       return True
   except Exception as e:
       print("[ERROR]")
       print(e)
   return False
Example #3
0
def measure_pre_throughput():
    # import_psutil()

    if FLAGS.cluster == "summit":
        tf.load_file_system_library(
            "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.9.0")
    dataset = dataset_factory.get_dataset(FLAGS.dataset_name, "train",
                                          FLAGS.dataset_dir)
    network_fn = nets_factory.get_network_fn(FLAGS.model_name,
                                             num_classes=dataset.num_classes,
                                             weight_decay=FLAGS.weight_decay,
                                             is_training=True)
    train_image_size = network_fn.default_image_size
    print("Model:", FLAGS.model_name)
    #Default graph, default to using cpu
    with tf.get_default_graph().as_default(), tf.device('/cpu:0'):
        global_step = tf.Variable(0, name="global_step", trainable=False)
        eater = get_precessing_eater(dataset, train_image_size)
        summaries = tf.get_collection(tf.GraphKeys.SUMMARIES)
        summary_op = tf.summary.merge(summaries)
        init = tf.global_variables_initializer()
        # cpu_util_summary()

    sess = tf.Session(config=get_config())
    coord = tf.train.Coordinator()
    tf.train.start_queue_runners(sess=sess, coord=coord)
    sess.run(init)
    summary_writer = tf.summary.FileWriter(train_dir, sess.graph)
    print("Warming up")
    sys.stdout.flush()
    #Warmup
    for i in range(10):
        sess.run(eater)
    print("Profiling")
    sys.stdout.flush()
    times = []
    num_batches = 10
    for i in range(num_batches):
        start = time.time()
        sess.run(eater)
        end = time.time()
        times.append(end - start)
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, i + 1)
    avg_time = sum(times) / len(times)
    batches_per_sec = 1. / avg_time
    images_per_sec = batches_per_sec * FLAGS.batch_size
    print("Batches/sec =", batches_per_sec)
    print("Images/sec =", images_per_sec)
    sys.stdout.flush()
Example #4
0
 def setUp(self):
   file_system_library = os.path.join(tf.resource_loader.get_data_files_path(),
                                      "test_file_system.so")
   tf.load_file_system_library(file_system_library)
Example #5
0
def get_main_ops():
    # import_psutil()
    os.system("sh gpu_util.sh > " + train_dir +
              "/nvidia_smi_reports.log 2>&1 &")

    # cpu_logger = CPULogger()
    # cpu_logger.start()
    if FLAGS.cluster == "summit":
        tf.load_file_system_library(
            "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.9.0")
        # print(os.popen("ls /usr/local/cuda/extras/CUPTI/lib64").read())

    main_start_t = time.time()

    dataset = dataset_factory.get_dataset(FLAGS.dataset_name, "train",
                                          FLAGS.dataset_dir)

    network_fn = nets_factory.get_network_fn(FLAGS.model_name,
                                             num_classes=dataset.num_classes,
                                             weight_decay=FLAGS.weight_decay,
                                             is_training=True)
    train_image_size = network_fn.default_image_size
    print("Model:", FLAGS.model_name)
    #Default graph, default to using cpu
    #with tf.Graph().as_default(), tf.device('/cpu:0'):
    with tf.get_default_graph().as_default(), tf.device('/cpu:0'):
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
        if FLAGS.num_gpus > 1:
            all_grads = []
            tower_batches = get_tower_batches(dataset, train_image_size)
            # done_pre_queue = get_preprocessed_queue(dataset, train_image_size)
            #Get gradients for each tower
            for gpu_idx in range(FLAGS.num_gpus):
                with tf.device("/device:GPU:%d" % gpu_idx):
                    with tf.name_scope("tower_%d" % gpu_idx) as scope:
                        # image, label = done_pre_queue.dequeue()
                        image, label = tower_batches[gpu_idx]
                        loss = compute_loss(image, label, dataset.num_classes,
                                            network_fn, scope, gpu_idx)
                        tf.get_variable_scope().reuse_variables()
                        #Summaries on first GPU
                        # if gpu_idx == 0:
                        #     summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
                        my_grads = optimizer.compute_gradients(
                            loss)  #, colocate_gradients_with_ops=True)
                        all_grads.append(my_grads)
            avg_grads = average_gradients(all_grads)
            train_op = optimizer.apply_gradients(avg_grads)
        else:
            # Optimized single GPU code
            done_pre_queue = get_preprocessed_queue(dataset, train_image_size)
            image, label = done_pre_queue.dequeue()
            images, labels = tf.train.batch(
                [image, label],
                batch_size=FLAGS.batch_size,
                num_threads=FLAGS.num_batching_threads,
                capacity=FLAGS.batch_queue_size * FLAGS.batch_size,
                shapes=[image.get_shape(), []])
            labels = slim.one_hot_encoding(labels, dataset.num_classes)

            with tf.device("/device:GPU:0"):
                with tf.name_scope("compute") as scope:
                    loss = compute_loss(images, labels, dataset.num_classes,
                                        network_fn, scope, 0)
                    tf.get_variable_scope().reuse_variables()
                    my_grads = optimizer.compute_gradients(
                        loss)  #, colocate_gradients_with_ops=True)
                train_op = optimizer.apply_gradients(my_grads)

        #Extra summaries
        # summaries.append(tf.summary.scalar('learning_rate', lr))
        # tf.summary.scalar('learning_rate', lr)
        # cpu_util_summary()

        # def get_gpu_util(idx):
        #     util_level = int(os.popen("nvidia-smi -i %d --format=csv --query-gpu=utilization.gpu | tail -n 1 | egrep -o [0-9]+" % idx).read().strip())
        #     return numpy.float32(float(util_level))
        # for i in range(FLAGS.num_gpus):
        #     gpu_util_tensor = tf.py_func(get_gpu_util, [i], tf.float32)
        #     tf.summary.scalar("gpu %d util" % i, gpu_util_tensor)

        # def get_gpu_power(idx):
        #     power = int(os.popen("nvidia-smi -i %d --format=csv --query-gpu=power.draw | tail -n 1 | egrep -o [0-9]+ | head -n 1" % idx).read().strip())
        #     return numpy.float32(float(power))
        # for i in range(FLAGS.num_gpus):
        #     gpu_power_tensor = tf.py_func(get_gpu_power, [i], tf.float32)
        #     tf.summary.scalar("gpu %d power (Watts)" % i, gpu_power_tensor)

        summaries = tf.get_collection(tf.GraphKeys.SUMMARIES)
        summary_op = tf.summary.merge(summaries)  #CHECK OVER
        # summary_op = tf.group(summary_op, train_op)
        #Lastly, create initializer
        init = tf.global_variables_initializer()
    return init, train_op, summary_op
Example #6
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse, multiprocessing, os, sys

from pathlib import Path

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf


tf.logging.set_verbosity(tf.logging.WARN)
if Path('lib/s3_file_system.so').exists():
    tf.load_file_system_library('lib/s3_file_system.so')

def cnn_train(args):
    from cnn.train import run
    run(
        args.device,
        args.model,
        args.topology,
        args.version,
        args.evalsource,
        args.trainsource,
        args.threads,
        args.prefetch,
        args.batch,
        args.steps,
        args.epochs,
Example #7
0
import tensorflow as tf
import zipfile

sys.path.insert(0, "third_party/syntaxnet")

from convert import convert_model
from dragnn.protos import spec_pb2
from dragnn.python import dragnn_ops
from dragnn.python import graph_builder
from dragnn.python import trainer_lib
from dragnn.python import check
from google.protobuf import text_format
from tensorflow.python.platform import gfile

# tf.load_op_library('bazel-bin/sling/nlp/parser/trainer/sempar.so')
tf.load_file_system_library('bazel-bin/sling/nlp/parser/trainer/sempar.so')

flags = tf.app.flags
FLAGS = flags.FLAGS

flags.DEFINE_string('master_spec', '',
                    'Path to a complete dragnn master spec text proto.')
flags.DEFINE_string('hyperparams', '', 'Training grid spec text proto.')
flags.DEFINE_string('output_folder', '', 'Full path of the output folder.')
flags.DEFINE_string('commons', '', 'Path to commons.')
flags.DEFINE_string('train_corpus', '', 'Training corpus.')
flags.DEFINE_string('dev_corpus', '', 'Dev corpus with gold frames.')
flags.DEFINE_string('tf_master', '',
                    'TensorFlow execution engine to connect to.')
flags.DEFINE_integer('train_steps', 200000, 'Number of training steps')
flags.DEFINE_integer('report_every', 500, 'Checkpoint interval')