def testComplexCodeView(self):
        ops.reset_default_graph()
        outfile = os.path.join(test.get_temp_dir(), 'dump')
        opts = (builder(
            builder.trainable_variables_parameter()).with_file_output(
                outfile).with_accounted_types(['.*']).with_node_names(
                    show_name_regexes=['.*model_analyzer_testlib.py.*']).
                account_displayed_op_only(False).select(
                    ['params', 'float_ops']).build())

        with profile_context.ProfileContext(test.get_temp_dir(),
                                            trace_steps=[],
                                            dump_steps=[]) as pctx:
            with session.Session() as sess:
                x = lib.BuildFullModel()

                sess.run(variables.global_variables_initializer())
                pctx.trace_next_step()
                _ = sess.run(x)
                tfprof_node = pctx.profiler.profile_python(options=opts)

                # pylint: disable=line-too-long
                with gfile.Open(outfile, 'r') as f:
                    lines = f.read().split('\n')
                    self.assertGreater(len(lines), 5)
                    result = '\n'.join([l[:min(len(l), 80)] for l in lines])
                    self.assertTrue(
                        compat.as_text(
                            lib.CheckAndRemoveDoc(result)).startswith(
                                'node name | # parameters | # float_ops'))

                self.assertLess(0, tfprof_node.total_exec_micros)
                self.assertEqual(2844, tfprof_node.total_parameters)
                #The graph is modifed when MKL is enabled,total_float_ops will
                #be different
                if test_util.IsMklEnabled():
                    self.assertLess(101600, tfprof_node.total_float_ops)
                else:
                    self.assertLess(145660, tfprof_node.total_float_ops)
                self.assertEqual(8, len(tfprof_node.children))
                self.assertEqual('_TFProfRoot', tfprof_node.name)
                self.assertEqual('model_analyzer_testlib.py:63:BuildFullModel',
                                 tfprof_node.children[0].name)
                self.assertEqual(
                    'model_analyzer_testlib.py:63:BuildFullModel (gradient)',
                    tfprof_node.children[1].name)
                self.assertEqual('model_analyzer_testlib.py:67:BuildFullModel',
                                 tfprof_node.children[2].name)
                self.assertEqual(
                    'model_analyzer_testlib.py:67:BuildFullModel (gradient)',
                    tfprof_node.children[3].name)
                self.assertEqual('model_analyzer_testlib.py:69:BuildFullModel',
                                 tfprof_node.children[4].name)
                self.assertEqual('model_analyzer_testlib.py:70:BuildFullModel',
                                 tfprof_node.children[5].name)
                self.assertEqual(
                    'model_analyzer_testlib.py:70:BuildFullModel (gradient)',
                    tfprof_node.children[6].name)
                self.assertEqual('model_analyzer_testlib.py:72:BuildFullModel',
                                 tfprof_node.children[7].name)
 def testConv3D2x2x2Filter1x2x1Dilation(self):
     if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled():
         self._VerifyDilatedConvValues(tensor_in_sizes=[1, 4, 6, 3, 1],
                                       filter_in_sizes=[2, 2, 2, 1, 1],
                                       stride=1,
                                       padding="VALID",
                                       dilations=[1, 2, 1])
 def testIsMklEnabled(self):
     # This test doesn't assert anything.
     # It ensures the py wrapper function is generated correctly.
     if test_util.IsMklEnabled():
         print("MKL is enabled")
     else:
         print("MKL is disabled")
Exemple #4
0
 def __init__(
         self,
         model_path='weights/MaskRCNN-R50C41x-COCO_finetune-docrop_and_rotate_24500.pb',
         canvas_size=512,
         debug=False):
     if not tf.test.is_gpu_available():
         from tensorflow.python.framework import test_util
         assert get_tf_version_tuple() >= (1, 7) and test_util.IsMklEnabled(), \
             "Inference requires either GPU support or MKL support!"
     self.canvas_size = canvas_size
     self.debug = debug
     self.id_to_class_name = {
         1: 'page',
         2: 'profile_image',
         3: 'van_tay',
         4: 'passport_code'
     }
     self.resizer = CustomResize(self.canvas_size, self.canvas_size)
     print('Loading model at', model_path)
     self.graph = load_graph(model_path)
     self.input_tensor = self.graph.get_tensor_by_name('import/image:0')
     self.output_node_name = [
         'output/boxes', 'output/scores', 'output/labels', 'output/masks'
     ]
     self.outputs_tensor = [
         self.graph.get_tensor_by_name('import/{}:0'.format(each_node))
         for each_node in self.output_node_name
     ]
     self.config = tf.compat.v1.ConfigProto()
     # self.config.gpu_options.allow_growth = True
     self.config.gpu_options.per_process_gpu_memory_fraction = 0.1
     self.sess = tf.compat.v1.Session(config=self.config, graph=self.graph)
     self.predict_crop(np.zeros((200, 200, 3), dtype=np.uint8))
     print('Loaded model!')
Exemple #5
0
    def testCreateMemDecBlockedFormat(self):
        """Try to create the mkl concat operation

    when one of the input's memory descriptor is in blocked format
    """
        if test_util.IsMklEnabled():
            s0 = np.ones((1, 8188, 4092, 1), dtype=np.uint8).astype(np.float32)
            s1 = array_ops.strided_slice(s0, [0, 1, 1, 0], [0, -1, -1, 0],
                                         [1, 1, 1, 1],
                                         begin_mask=9,
                                         end_mask=9)
            s2 = array_ops.slice(s1, [0, 0, 0, 0], [-1, -1, -1, 1])
            s3_1 = array_ops.slice(s2, [0, 4, 4, 0], [-1, 8178, 4082, 1])
            s3_2 = array_ops.slice(s2, [0, 4, 4, 0], [-1, 8178, 4082, 1])
            filter4_1 = constant_op.constant([[[[1.18, -0.51]]]])
            s4_1 = nn_ops.conv2d(s3_1,
                                 filter4_1,
                                 strides=[1, 1, 1, 1],
                                 padding="VALID")
            filter4_2 = constant_op.constant([[[[1.38, -0.11]]]])
            s4_2 = nn_ops.conv2d(s3_2,
                                 filter4_2,
                                 strides=[1, 1, 1, 1],
                                 padding="VALID")
            s5_1 = array_ops.slice(s4_1, [0, 6, 6, 0], [-1, 1, 1, -1])
            s5_2 = array_ops.slice(s4_2, [0, 6, 6, 0], [-1, 1, 1, -1])
            x_concat = array_ops.concat([s5_1, s5_2], 3)
            self.evaluate(
                x_concat
            )  # This test is only meant to check the creation is not crashed
Exemple #6
0
  def testResetMemoryStatsCPU(self):
    if test_util.IsMklEnabled():
      # TODO(gzmkl) work with Google team to address design issue in allocator.h
      self.skipTest('MklCPUAllocator does not throw exception. So skip test.')

    with self.assertRaisesRegex(ValueError, 'Cannot reset memory stats'):
      config.reset_memory_stats('CPU:0')
  def testAnalysisAndAllocations(self):
    run_options = config_pb2.RunOptions(
        trace_level=config_pb2.RunOptions.FULL_TRACE)
    run_metadata = config_pb2.RunMetadata()
    config = config_pb2.ConfigProto(device_count={'CPU': 3})

    with session.Session(config=config) as sess:
      with ops.device('/cpu:0'):
        num1 = variables.Variable(1.0, name='num1')
      with ops.device('/cpu:1'):
        num2 = variables.Variable(2.0, name='num2')
      with ops.device('/cpu:2'):
        result = num1 + num2 + num1 * num2
      sess.run(variables.global_variables_initializer())
      sess.run(result, options=run_options, run_metadata=run_metadata)

    self.assertTrue(run_metadata.HasField('step_stats'))
    tl = timeline.Timeline(run_metadata.step_stats)
    step_analysis = tl.analyze_step_stats()
    ctf = step_analysis.chrome_trace.format_to_string()
    self._validateTrace(ctf)
    maximums = step_analysis.allocator_maximums
    cpuname = 'mklcpu' if test_util.IsMklEnabled() else 'cpu'
    self.assertTrue(cpuname in maximums)
    cpu_max = maximums[
        'cuda_host_bfc'] if 'cuda_host_bfc' in maximums else maximums[cpuname]
    # At least num1 + num2, both float32s (4 bytes each)
    self.assertGreater(cpu_max.num_bytes, 8)
    self.assertGreater(cpu_max.timestamp, 0)
    self.assertTrue('num1' in cpu_max.tensors or 'num1/read' in cpu_max.tensors)
    self.assertTrue('num2' in cpu_max.tensors or 'num2/read' in cpu_max.tensors)
 def _maybe_skip(self, mode):
     if mode == 'cuda' and not test.is_gpu_available(cuda_only=True):
         self.skipTest('No GPU is available')
     if mode == 'mkl' and not test_util.IsMklEnabled():
         self.skipTest('MKL is not enabled')
     # Test will fail on machines without AVX512f, e.g., Broadwell
     isAVX512f = _pywrap_utils.IsBF16SupportedByOneDNNOnThisCPU()
     if mode == 'mkl' and not isAVX512f:
         self.skipTest('Skipping test due to non-AVX512f machine')
Exemple #9
0
  def testGetMemoryInfoCPU(self):
    if test_util.IsMklEnabled():
      # TODO(gzmkl) work with Google team to address design issue in allocator.h
      self.skipTest('MklCPUAllocator does not throw exception. So skip test.')

    with self.assertRaisesRegex(ValueError, 'Allocator stats not available'):
      config.get_memory_info('CPU:0')
    with self.assertRaisesRegex(ValueError, 'Allocator stats not available'):
      config.get_memory_usage('CPU:0')
    def testSmallNetworkCost(self):
        image = array_ops.placeholder(dtypes.float32, shape=[1, 28, 28, 1])
        label = array_ops.placeholder(dtypes.float32, shape=[1, 10])
        w = variables.Variable(
            random_ops.truncated_normal([5, 5, 1, 32], stddev=0.1))
        b = variables.Variable(random_ops.truncated_normal([32], stddev=0.1))
        conv = nn_ops.conv2d(image, w, strides=[1, 1, 1, 1], padding="SAME")
        h_conv = nn_ops.relu(conv + b)
        h_conv_flat = array_ops.reshape(h_conv, [1, -1])

        w_fc = variables.Variable(
            random_ops.truncated_normal([25088, 10], stddev=0.1))
        b_fc = variables.Variable(random_ops.truncated_normal([10],
                                                              stddev=0.1))
        y_conv = nn_ops.softmax(math_ops.matmul(h_conv_flat, w_fc) + b_fc)

        cross_entropy = math_ops.reduce_mean(
            -math_ops.reduce_sum(label * math_ops.log(y_conv), axis=[1]))
        _ = adam.AdamOptimizer(1e-4).minimize(cross_entropy)

        mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph())
        report = cost_analyzer.GenerateCostReport(mg)

        # Print the report to make it easier to debug
        print("{}".format(report))

        self.assertTrue(b"MatMul" in report)
        self.assertTrue(b"ApplyAdam" in report)
        self.assertTrue(b"Conv2DBackpropFilter" in report)
        self.assertTrue(b"Softmax" in report)

        # When mkl is enabled, Conv2D and MatMul op followed by
        # 1-dimension Add in this graph will be fused, but not
        # in the mkl disabled case.
        expected_matmul_count = 2
        op_types = [b"MatMul", b"Conv2DBackpropFilter"]

        if not test_util.IsMklEnabled():
            self.assertTrue(b"Conv2D" in report)
            expected_matmul_count = 3
            op_types.append(b"Conv2D")

        for op_type in op_types:
            matcher = re.compile(
                br"\s+" + op_type +
                br",\s*(\d+),\s*(\d+),\s*([\d\.eE+-]+)%,\s*" +
                br"([\d\.eE+-]+)%,\s*(-?\d+),\s*(\d+),", re.MULTILINE)
            m = matcher.search(report)

            op_count = int(m.group(1))
            # upper = int(m.group(5))
            lower = int(m.group(6))
            if op_type == b"MatMul":
                self.assertEqual(expected_matmul_count, op_count)
            else:
                self.assertEqual(1, op_count)
            self.assertTrue(0 <= lower)
Exemple #11
0
 def testSoftmaxGradGradExtendType(self):
     if test_util.IsMklEnabled():
         inputs = constant_op.constant([[-2, -1, 1, 3], [5, 7, 8, 9]],
                                       dtype=dtypes.bfloat16)
         r = nn_ops.softmax(inputs)
         r_g = gradients_impl.gradients(r, inputs)[0]
         with self.cached_session():
             error = gradient_checker.compute_gradient_error(
                 inputs, inputs.get_shape(), r_g, r_g.get_shape())
             self.assertLess(error, 1e-4)
Exemple #12
0
 def testConv3D2x2x2Filter1x2x1Dilation(self):
     ctx = context.context()
     is_eager = ctx is not None and ctx.executing_eagerly()
     if test.is_gpu_available(cuda_only=True) or \
       (test_util.IsMklEnabled() and is_eager is False):
         self._VerifyDilatedConvValues(tensor_in_sizes=[1, 4, 6, 3, 1],
                                       filter_in_sizes=[2, 2, 2, 1, 1],
                                       stride=1,
                                       padding="VALID",
                                       dilations=[1, 2, 1])
Exemple #13
0
    def testGetOps(self):
        default_ops = 'NoOp:NoOp,_Recv:RecvOp,_Send:SendOp'
        graphs = [
            text_format.Parse(d, graph_pb2.GraphDef())
            for d in [GRAPH_DEF_TXT, GRAPH_DEF_TXT_2]
        ]

        ops_and_kernels = selective_registration_header_lib.get_ops_and_kernels(
            'rawproto', self.WriteGraphFiles(graphs), default_ops)
        matmul_prefix = ''
        if test_util.IsMklEnabled():
            matmul_prefix = 'Mkl'

        self.assertListEqual(
            [
                ('AccumulateNV2', None),  #
                ('BiasAdd', 'BiasOp<CPUDevice, float>'),  #
                ('MatMul',
                 matmul_prefix + 'MatMulOp<CPUDevice, double, false >'),  #
                ('MatMul',
                 matmul_prefix + 'MatMulOp<CPUDevice, float, false >'),  #
                ('NoOp', 'NoOp'),  #
                ('Reshape', 'ReshapeOp'),  #
                ('_Recv', 'RecvOp'),  #
                ('_Send', 'SendOp'),  #
            ],
            ops_and_kernels)

        graphs[0].node[0].ClearField('device')
        graphs[0].node[2].ClearField('device')
        ops_and_kernels = selective_registration_header_lib.get_ops_and_kernels(
            'rawproto', self.WriteGraphFiles(graphs), default_ops)
        self.assertListEqual(
            [
                ('AccumulateNV2', None),  #
                ('BiasAdd', 'BiasOp<CPUDevice, float>'),  #
                ('MatMul',
                 matmul_prefix + 'MatMulOp<CPUDevice, double, false >'),  #
                ('MatMul',
                 matmul_prefix + 'MatMulOp<CPUDevice, float, false >'),  #
                ('NoOp', 'NoOp'),  #
                ('Reshape', 'ReshapeOp'),  #
                ('_Recv', 'RecvOp'),  #
                ('_Send', 'SendOp'),  #
            ],
            ops_and_kernels)
Exemple #14
0
    def maybe_skip_test(self, mode):
        if mode == 'cuda':
            # It seems the windows os cannot correctly query the cuda_version.
            # TODO(kaixih@nvidia): Remove this when it works.
            if os.name == 'nt':
                self.skipTest("This test doesn't support Windows")

            # The cublaslt matmul with gelu epilog is only supported since cuda 11.4.
            if not test.is_gpu_available(cuda_only=True):
                self.skipTest('This test requires GPU.')
            cuda_version_str = sysconfig.get_build_info().get(
                'cuda_version', '0.0')
            cuda_version = tuple([int(x) for x in cuda_version_str.split('.')])
            if cuda_version < (11, 4):
                self.skipTest('This test requires CUDA >= 11.4.')

        if mode == 'mkl' and not test_util.IsMklEnabled():
            self.skipTest('MKL is not enabled.')
Exemple #15
0
def run(base_dir, ext="jpg", store_results='', smart=False):
    if smart:
        raise NotImplementedError

    using_gpu = tf.test.is_gpu_available()
    if using_gpu:
        logger.info("Running on GPU")
    else:
        from tensorflow.python.framework import test_util as tftest_util
        assert tftest_util.IsMklEnabled(
        ), "This tensorflow is not compiled with MKL. Abort."
        logger.warn("Running on CPU")

    results = []

    # Download and uncompress model
    checkpoint_url = "http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz"
    checkpoints_dir = s3dexp.config.CKPT_DIR
    checkpoint_path = os.path.join(checkpoints_dir,
                                   'mobilenet_v1_1.0_224.ckpt')

    if not tf.gfile.Exists(checkpoints_dir):
        tf.gfile.MakeDirs(checkpoints_dir)
        dataset_utils.download_and_uncompress_tarball(checkpoint_url,
                                                      checkpoints_dir)

    with tf.Graph().as_default():
        logger.info("Creating compute graph ...")
        ########################################
        # Select the model
        ########################################
        network_fn = nets_factory.get_network_fn('mobilenet_v1',
                                                 num_classes=1001,
                                                 is_training=False)
        image_size = mobilenet_v1.mobilenet_v1.default_image_size

        ########################################
        # Define input and preprocessing tensors
        ########################################
        # crucial to specify dtype=tf.unit8. Otherwise will get wrong predictions.
        inputs = tf.placeholder(dtype=tf.uint8,
                                shape=(None, image_size, image_size, 3))
        preprocessing_fn = get_preprocessing('mobilenet_v1')
        processed_images = tf.map_fn(
            lambda x: preprocessing_fn(x, image_size, image_size),
            inputs,
            dtype=tf.float32)

        ########################################
        # Create the compute graph
        ########################################
        logits, _ = network_fn(processed_images)
        probabilities = tf.nn.softmax(logits)

        # https://github.com/tensorflow/tensorflow/issues/4196
        # https://www.tensorflow.org/programmers_guide/using_gpu
        config = tf.ConfigProto()
        # config.gpu_options.allow_growth = True
        # config.gpu_options.per_process_gpu_memory_fraction = 0.4
        with tf.Session(config=config) as sess:
            logger.info("Loading checkpoint from %s" % checkpoint_path)
            saver = tf.train.Saver()
            saver.restore(sess, checkpoint_path)

            logger.info("Warm up with a fake image")
            fakeimages = np.random.randint(0,
                                           256,
                                           size=(1, image_size, image_size, 3),
                                           dtype=np.uint8)
            _ = sess.run(probabilities, feed_dict={inputs: fakeimages})

            ########################################
            # walk through directory and inference
            ########################################
            for path in recursive_glob(base_dir, "*.{}".format(ext)):
                tic = time.time()

                if not smart:
                    # 0. read from disk
                    with open(path, 'rb') as f:
                        buf = f.read()
                    read_time = time.time() - tic

                    # 1. image decode
                    arr = cv2.imdecode(np.frombuffer(buf, np.int8),
                                       cv2.IMREAD_COLOR)
                    decode_time = time.time() - tic
                else:
                    raise NotImplementedError

                h, w = arr.shape[:2]

                # 2. Run inference
                # resize
                arr_resized = cv2.resize(arr, (image_size, image_size),
                                         interpolation=cv2.INTER_AREA)
                images = np.expand_dims(arr_resized, 0)
                _ = sess.run(probabilities, feed_dict={inputs: images})

                all_time = time.time() - tic

                logger.debug(
                    "Read {:.1f} ms, Decode {:.1f}, Total {:.1f}. {}".format(
                        read_time * 1000, decode_time * 1000, all_time * 1000,
                        path))

                results.append({
                    'path': path,
                    'read_ms': read_time * 1000,
                    'decode_ms': decode_time * 1000,
                    'total_ms': all_time * 1000,
                    'size': len(buf),
                    'height': h,
                    'width': w
                })

    if store_results:
        logger.info("Writing {} results to DB".format(len(results)))
        dbsess = dbutils.get_session()
        for r in results:
            keys_dict = {
                'path': r['path'],
                'basename': os.path.basename(r['path']),
                'expname': 'mobilenet_inference',
                'device': 'gpu' if using_gpu else 'cpu',
                'disk': 'smart' if smart else 'hdd'
            }

            dbutils.insert_or_update_one(dbsess,
                                         dbmodles.AppExp,
                                         keys_dict=keys_dict,
                                         vals_dict={
                                             'read_ms': r['read_ms'],
                                             'decode_ms': r['decode_ms'],
                                             'total_ms': r['total_ms'],
                                             'size': r['size'],
                                             'height': r['height'],
                                             'width': r['width']
                                         })
        dbsess.commit()
        dbsess.close()
Exemple #16
0
# 5b) I find it harder to adjust the hyperparameters for predicting sin.
# I have noticed it is harder to get a good model at 10 compared to 0 when using data within that interval.
# However it looks like the model gets more precise when expaning the traning data to 15 or 20

# Imports
import matplotlib.pyplot as plt
import numpy as np
import time
from keras.models import Sequential
from keras.layers import Dense

from tensorflow.python.framework import test_util
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
print(test_util.IsMklEnabled())

# Load training data
x = np.random.uniform(0, 10, (20000, 1))
y = np.sin(x)

# # Define model
# model = Sequential()
# model.add(Dense(140, input_dim=1, activation='relu'))
# model.add(Dense(1))
# model.compile(loss='mean_squared_error', optimizer='adam')
# prefit = time.time()
# model.fit(x, y, epochs=100, batch_size=50)
# postfit = time.time()

# Define model
model = Sequential()
def collect_env_info():
    """
    Returns:
        str - a table contains important information about the environment
    """
    data = []
    data.append(("sys.platform", sys.platform))
    data.append(("Python", sys.version.replace("\n", "")))
    data.append(
        ("Tensorpack",
         __git_version__ + " @" + os.path.dirname(tensorpack.__file__)))
    data.append(("Numpy", np.__version__))

    data.append(("TensorFlow", tfv1.VERSION + "/" + tfv1.GIT_VERSION + " @" +
                 os.path.dirname(tf.__file__)))
    data.append(("TF Compiler Version", tfv1.COMPILER_VERSION))
    has_cuda = tf.test.is_built_with_cuda()
    data.append(("TF CUDA support", has_cuda))

    try:
        from tensorflow.python.framework import test_util
        data.append(("TF MKL support", test_util.IsMklEnabled()))
    except Exception:
        pass

    try:
        from tensorflow.python.framework import test_util
        data.append(("TF XLA support", test_util.is_xla_enabled()))
    except Exception:
        pass

    if has_cuda:
        data.append(("Nvidia Driver", find_library("nvidia-ml")))
        data.append(("CUDA libs", find_library("cudart")))
        data.append(("CUDNN libs", find_library("cudnn")))
        for k, v in parse_TF_build_info().items():
            data.append((k, v))
        data.append(("NCCL libs", find_library("nccl")))

        # List devices with NVML
        data.append(("CUDA_VISIBLE_DEVICES",
                     os.environ.get("CUDA_VISIBLE_DEVICES", "Unspecified")))
        try:
            devs = defaultdict(list)
            with NVMLContext() as ctx:
                for idx, dev in enumerate(ctx.devices()):
                    devs[dev.name()].append(str(idx))

            for devname, devids in devs.items():
                data.append(("GPU " + ",".join(devids), devname))
        except Exception:
            data.append(("GPU", "Not found with NVML"))

    vram = psutil.virtual_memory()
    data.append(
        ("Free RAM", "{:.2f}/{:.2f} GB".format(vram.available / 1024**3,
                                               vram.total / 1024**3)))
    data.append(("CPU Count", psutil.cpu_count()))

    # Other important dependencies:
    try:
        import horovod
        data.append(
            ("Horovod",
             horovod.__version__ + " @" + os.path.dirname(horovod.__file__)))
    except ImportError:
        pass

    try:
        import cv2
        data.append(("cv2", cv2.__version__))
    except ImportError:
        pass

    import msgpack
    data.append(("msgpack", ".".join([str(x) for x in msgpack.version])))

    has_prctl = True
    try:
        import prctl
        _ = prctl.set_pdeathsig  # noqa
    except Exception:
        has_prctl = False
    data.append(("python-prctl", has_prctl))

    return tabulate(data)
Exemple #18
0
    def testArithmeticOptimizationActive(self):
        """Tests that tfdbg can dump the tensor from nodes created by Grappler."""
        with session.Session(
                config=_grappler_enabled_session_config()) as sess:
            u = variables.VariableV1([[1, 2], [3, 4]],
                                     name="u",
                                     dtype=dtypes.float32)
            # The next two ops should be optimized by Grappler into a single op:
            # either an AddN op or a Mul op.
            x = math_ops.add(u, u)
            x = math_ops.add(x, u)
            y = math_ops.multiply(x, u)

            sess.run(variables.global_variables_initializer())

            run_options = config_pb2.RunOptions(output_partition_graphs=True)
            debug_utils.watch_graph(run_options,
                                    sess.graph,
                                    debug_ops=["DebugIdentity"],
                                    debug_urls=[self._debug_url])

            run_metadata = config_pb2.RunMetadata()
            run_result = sess.run(y,
                                  options=run_options,
                                  run_metadata=run_metadata)
            self.assertAllClose(run_result, [[3, 12], [27, 48]])

            dump_data = debug_data.DebugDumpDir(
                self._dump_root,
                partition_graphs=run_metadata.partition_graphs,
                validate=True)

            original_node_names = set(op.name
                                      for op in sess.graph.get_operations())
            dumped_node_names = set(dump_data.nodes())
            grappler_created_node_names = dumped_node_names - original_node_names
            grappler_removed_node_names = original_node_names - dumped_node_names

            # Assert that Grappler should have replaced some of the nodes from the
            # original graph with new nodes.
            self.assertTrue(grappler_created_node_names)
            self.assertTrue(grappler_removed_node_names)

            # Iterate through the nodes created by Grappler. One of them should be
            # be the result of replacing the original add ops with an AddN op or a
            # Mul op.
            found_optimized_node = False
            for grappler_node_name in grappler_created_node_names:
                node_op_type = dump_data.node_op_type(grappler_node_name)
                # Look for the node created by Grappler's arithmetic optimization.
                if ((test_util.IsMklEnabled()
                     and node_op_type in ("_MklAddN", "Mul"))
                        or (node_op_type in ("AddN", "Mul"))):
                    datum = dump_data.get_tensors(grappler_node_name, 0,
                                                  "DebugIdentity")
                    self.assertEqual(1, len(datum))
                    self.assertAllClose(datum[0], [[3, 6], [9, 12]])
                    found_optimized_node = True
                    break
            self.assertTrue(
                found_optimized_node,
                "Failed to find optimized node created by Grappler's arithmetic "
                "optimization.")
Exemple #19
0
def run(base_dir,
        ext="jpg",
        store_results='',
        smart=False,
        batch_size=8,
        num_parallel_calls=None,
        etl_only=False):
    # adjust default parameters
    if not num_parallel_calls:
        num_parallel_calls = batch_size

    # GPU or CPU?
    using_gpu = tf.test.is_gpu_available()
    if using_gpu:
        logger.info("Running on GPU")
    else:
        from tensorflow.python.framework import test_util as tftest_util
        assert tftest_util.IsMklEnabled(
        ), "This tensorflow is not compiled with MKL. Abort."
        logger.warn("Running on CPU")

    # Download and uncompress model
    checkpoint_url = "http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz"
    checkpoints_dir = s3dexp.config.CKPT_DIR
    checkpoint_path = os.path.join(checkpoints_dir,
                                   'mobilenet_v1_1.0_224.ckpt')
    if not tf.gfile.Exists(checkpoints_dir):
        tf.gfile.MakeDirs(checkpoints_dir)
        dataset_utils.download_and_uncompress_tarball(checkpoint_url,
                                                      checkpoints_dir)

    # Prepare the `load_and_preprocess_fn` function to be passed into Dataset.map
    # NOTE: in graph mode, this function takes in tensor and adds operators to the graph
    if not smart:

        def load_and_preprocess_fn(path):  # path is tensor
            # 0. read from disk
            raw = tf.read_file(path)
            # 1. image decode
            image = tf.image.decode_jpeg(
                raw, channels=3
            )  # tf.image.decoe_image() doesn't return shape, causing error  https://stackoverflow.com/questions/44942729/tensorflowvalueerror-images-contains-no-shape
            # 2. resize
            image_resize = tf.image.resize_images(image,
                                                  (image_size, image_size))
            return image_resize  # Tensor
    else:
        # TODO use our smart storage here
        raise NotImplementedError

        def load_and_preprocess_fn(path):
            def smart_fn(path):
                # this pure Python funciton will actually be called many times, by multiple threads if num_parallel_calls>1
                logger.debug("Enter smart_fn. Path {}".format(path))
                # TODO replace with real smart storage logic
                fakeimage = np.random.randint(0,
                                              256,
                                              size=(image_size, image_size, 3),
                                              dtype=np.uint8)
                logger.debug("Exit smart_fn")
                return fakeimage

            out_op = tf.py_func(smart_fn, [path], tf.uint8)
            out_op.set_shape([image_size, image_size,
                              3])  # must explicitly set shape to avoid error
            return out_op

    results = []

    with tf.Graph().as_default():
        logger.info("Creating compute graph ...")
        ########################################
        # Select the model
        ########################################
        network_fn = nets_factory.get_network_fn('mobilenet_v1',
                                                 num_classes=1001,
                                                 is_training=False)
        image_size = mobilenet_v1.mobilenet_v1.default_image_size

        ########################################
        # Create a tf.data.Dataset with batching
        ########################################
        all_paths = list(recursive_glob(base_dir, "*.{}".format(ext)))
        logger.info("Found {} paths".format(len(all_paths)))
        path_ds = tf.data.Dataset.from_tensor_slices(all_paths)
        image_ds = path_ds.map(
            load_and_preprocess_fn,
            num_parallel_calls=num_parallel_calls).batch(batch_size)
        # create iterator
        iterator = image_ds.make_initializable_iterator()
        batch_of_images = iterator.get_next()

        ########################################
        # Define input and preprocessing tensors
        ########################################
        preprocessing_fn = get_preprocessing('mobilenet_v1')
        processed_images = tf.map_fn(
            lambda x: preprocessing_fn(x, image_size, image_size),
            batch_of_images,
            dtype=tf.float32)

        ########################################
        # Create the compute graph
        ########################################
        logits, _ = network_fn(processed_images)
        probabilities = tf.nn.softmax(logits)

        config = tf.ConfigProto()
        with tf.Session(config=config) as sess:
            logger.info("Loading checkpoint from %s" % checkpoint_path)
            saver = tf.train.Saver()
            saver.restore(sess, checkpoint_path)

            # initialize Dataset iterator
            sess.run(iterator.initializer)

            logger.info("Warm up with a fake batch")
            fakeimages = np.random.randint(0,
                                           256,
                                           size=(batch_size, image_size,
                                                 image_size,
                                                 3)).astype(np.float32)
            _ = sess.run(probabilities,
                         feed_dict={processed_images: fakeimages})

            try:
                count_image = 0
                count_batch = 0
                elapsed = 0.
                tic = time.time()
                while True:
                    if etl_only:
                        res = sess.run(batch_of_images)
                    else:
                        res = sess.run(probabilities)

                    toc = time.time()
                    logger.debug(
                        "Batch {}, batch size {}, elapsed {:.1f}".format(
                            count_batch, res.shape[0],
                            1000 * (toc - tic - elapsed)))

                    if res.shape[0] < batch_size:
                        # discard last batch
                        continue
                    else:
                        elapsed = toc - tic
                        count_batch += 1
                        count_image += batch_size

            except tf.errors.OutOfRangeError:
                pass
            finally:
                logger.info(
                    "Ran {} batches, {} images, batch size {}, avg ms/image {:.2f}"
                    .format(count_batch, count_image, batch_size,
                            elapsed * 1000 / count_image))
Exemple #20
0
def collect_env_info():
    """
    Returns:
        str - a table contains important information about the environment
    """
    data = []
    data.append(("Python", sys.version.replace("\n", "")))
    data.append(("Tensorpack", __git_version__))
    data.append(("TensorFlow", tfv1.VERSION + "/" + tfv1.GIT_VERSION))
    data.append(("TF Compiler Version", tfv1.COMPILER_VERSION))
    has_cuda = tf.test.is_built_with_cuda()
    data.append(("TF CUDA support", has_cuda))

    try:
        from tensorflow.python.framework import test_util
        data.append(("TF MKL support", test_util.IsMklEnabled()))
    except Exception:
        pass

    try:
        from tensorflow.python.framework import test_util
        data.append(("TF XLA support", test_util.is_xla_enabled()))
    except Exception:
        pass

    if has_cuda:
        data.append(("Nvidia Driver", find_library("nvidia-ml")))
        data.append(("CUDA", find_library("cudart")))
        data.append(("CUDNN", find_library("cudnn")))
        data.append(("NCCL", find_library("nccl")))

        # List devices with NVML
        data.append(("CUDA_VISIBLE_DEVICES",
                     os.environ.get("CUDA_VISIBLE_DEVICES", str(None))))
        try:
            devs = defaultdict(list)
            with NVMLContext() as ctx:
                for idx, dev in enumerate(ctx.devices()):
                    devs[dev.name()].append(str(idx))

            for devname, devids in devs.items():
                data.append(("GPU " + ",".join(devids), devname))
        except Exception:
            data.append(("GPU", "Not found with NVML"))

    # Other important dependencies
    try:
        import horovod
        data.append(("horovod", horovod.__version__))
    except ImportError:
        pass

    try:
        import cv2
        data.append(("cv2", cv2.__version__))
    except ImportError:
        pass

    import msgpack
    data.append(("msgpack", ".".join([str(x) for x in msgpack.version])))

    has_prctl = True
    try:
        import prctl
        _ = prctl.set_pdeathsig
    except Exception:
        has_prctl = False
    data.append(("python-prctl", has_prctl))

    return tabulate(data)
Exemple #21
0
 def check_tf_mkl(self):
     assert (
         test_util.IsMklEnabled()), "tensorflow doesn't have mkl enabled"
Exemple #22
0
    parser.add_argument(
        '--config',
        help="A list of KEY=VALUE to overwrite those defined in config.py",
        nargs='+')
    parser.add_argument('--compact', help='Save a model to .pb')
    parser.add_argument('--serving', help='Save a model to serving file')

    args = parser.parse_args()
    if args.config:
        cfg.update_args(args.config)
    register_coco(cfg.DATA.BASEDIR)  # add COCO datasets to the registry
    MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model()

    if not tf.test.is_gpu_available():
        from tensorflow.python.framework import test_util
        assert get_tf_version_tuple() >= (1, 7) and test_util.IsMklEnabled(), \
            "Inference requires either GPU support or MKL support!"
    assert args.load
    finalize_configs(is_training=False)

    if args.predict or args.visualize:
        cfg.TEST.RESULT_SCORE_THRESH = cfg.TEST.RESULT_SCORE_THRESH_VIS

    if args.visualize:
        do_visualize(MODEL, args.load)
    else:
        predcfg = PredictConfig(
            model=MODEL,
            session_init=get_model_loader(args.load),
            input_names=MODEL.get_inference_tensor_names()[0],
            output_names=MODEL.get_inference_tensor_names()[1])
Exemple #23
0
def collect_env_info():
    """
    Returns:
        str - a table contains important information about the environment
    """
    data = []
    data.append(("Python", sys.version.replace("\n", "")))
    data.append(("Tensorpack", __git_version__))
    data.append(("TensorFlow", tfv1.VERSION + "/" + tfv1.GIT_VERSION))
    data.append(("TF Compiler Version", tfv1.COMPILER_VERSION))
    data.append(("TF CUDA support", tf.test.is_built_with_cuda()))

    try:
        from tensorflow.python.framework import test_util
        data.append(("TF MKL support", test_util.IsMklEnabled()))
    except Exception:
        pass

    try:
        from tensorflow.python.framework import test_util
        data.append(("TF XLA support", test_util.is_xla_enabled()))
    except Exception:
        pass

    def find_library_with_ldconfig(ldconfig, lib):
        # Read sonames from ldconfig: may not be accurate
        # similar to from ctypes.util import find_library, but with full path
        expr = r'\s+(lib%s\.[^\s]+)\s+\(.*=>\s+(.*)' % (re.escape(lib))
        res = re.search(expr, ldconfig)
        if not res:
            return None
        else:
            ret = res.group(2)
            return os.path.realpath(ret)

    try:
        with change_env('LC_ALL', 'C'), change_env('LANG', 'C'):
            ldconfig, ret = subproc_call("ldconfig -p")
        assert ret == 0
        ldconfig = ldconfig.decode('utf-8')

        def find_library(x):
            return find_library_with_ldconfig(ldconfig, x)

    except Exception:
        from ctypes.util import find_library

    data.append(("CUDA", find_library("cudart")))
    data.append(("CUDNN", find_library("cudnn")))
    data.append(("NCCL", find_library("nccl")))

    # List devices with NVML
    data.append(("CUDA_VISIBLE_DEVICES",
                 os.environ.get("CUDA_VISIBLE_DEVICES", str(None))))
    try:
        devs = defaultdict(list)
        with NVMLContext() as ctx:
            for idx, dev in enumerate(ctx.devices()):
                devs[dev.name()].append(str(idx))

        for devname, devids in devs.items():
            data.append(("GPU " + ",".join(devids) + " Model", devname))
    except Exception:
        pass

    # Other important dependencies
    try:
        import horovod
        data.append(("horovod", horovod.__version__))
    except ImportError:
        pass

    try:
        import cv2
        data.append(("cv2", cv2.__version__))
    except ImportError:
        pass

    import msgpack
    data.append(("msgpack", ".".join([str(x) for x in msgpack.version])))

    has_prctl = True
    try:
        import prctl
        _ = prctl.set_pdeathsig
    except Exception:
        has_prctl = False
    data.append(("python-prctl", has_prctl))

    return tabulate(data)
Exemple #24
0
 def _maybe_skip(self, mode):
     if mode == 'cuda' and not test.is_gpu_available(cuda_only=True):
         self.skipTest('No GPU is available')
     if mode == 'mkl' and not test_util.IsMklEnabled():
         self.skipTest('MKL is not enabled')
Exemple #25
0
 def _maybe_skip(self, mode):
   if mode == 'cuda':
     self.skipTest('This test does not pass on GPU.')
   if mode == 'mkl' and not test_util.IsMklEnabled():
     self.skipTest('MKL is not enabled.')
Exemple #26
0
        logger.warn(
            "TF<1.6 has a bug which may lead to crash in FasterRCNN if you're unlucky."
        )

    args = parser.parse_args()
    if args.config:
        cfg.update_args(args.config)

    MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model()
    YCBVDetectionDataset(
    )  # initialize the config with information from our dataset

    if args.visualize or args.evaluate or args.predict:
        if not tf.test.is_gpu_available():
            from tensorflow.python.framework import test_util
            assert test_util.IsMklEnabled(
            ), "Inference requires either GPU support or MKL support!"
        assert args.load
        finalize_configs(is_training=False)

        if args.predict or args.visualize:
            cfg.TEST.RESULT_SCORE_THRESH = cfg.TEST.RESULT_SCORE_THRESH_VIS

        if args.visualize:
            do_visualize(MODEL, args.load)
        else:
            predcfg = PredictConfig(
                model=MODEL,
                session_init=get_model_loader(args.load),
                input_names=MODEL.get_inference_tensor_names()[0],
                output_names=MODEL.get_inference_tensor_names()[1])
            if args.predict: