def testComplexCodeView(self): ops.reset_default_graph() outfile = os.path.join(test.get_temp_dir(), 'dump') opts = (builder( builder.trainable_variables_parameter()).with_file_output( outfile).with_accounted_types(['.*']).with_node_names( show_name_regexes=['.*model_analyzer_testlib.py.*']). account_displayed_op_only(False).select( ['params', 'float_ops']).build()) with profile_context.ProfileContext(test.get_temp_dir(), trace_steps=[], dump_steps=[]) as pctx: with session.Session() as sess: x = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) pctx.trace_next_step() _ = sess.run(x) tfprof_node = pctx.profiler.profile_python(options=opts) # pylint: disable=line-too-long with gfile.Open(outfile, 'r') as f: lines = f.read().split('\n') self.assertGreater(len(lines), 5) result = '\n'.join([l[:min(len(l), 80)] for l in lines]) self.assertTrue( compat.as_text( lib.CheckAndRemoveDoc(result)).startswith( 'node name | # parameters | # float_ops')) self.assertLess(0, tfprof_node.total_exec_micros) self.assertEqual(2844, tfprof_node.total_parameters) #The graph is modifed when MKL is enabled,total_float_ops will #be different if test_util.IsMklEnabled(): self.assertLess(101600, tfprof_node.total_float_ops) else: self.assertLess(145660, tfprof_node.total_float_ops) self.assertEqual(8, len(tfprof_node.children)) self.assertEqual('_TFProfRoot', tfprof_node.name) self.assertEqual('model_analyzer_testlib.py:63:BuildFullModel', tfprof_node.children[0].name) self.assertEqual( 'model_analyzer_testlib.py:63:BuildFullModel (gradient)', tfprof_node.children[1].name) self.assertEqual('model_analyzer_testlib.py:67:BuildFullModel', tfprof_node.children[2].name) self.assertEqual( 'model_analyzer_testlib.py:67:BuildFullModel (gradient)', tfprof_node.children[3].name) self.assertEqual('model_analyzer_testlib.py:69:BuildFullModel', tfprof_node.children[4].name) self.assertEqual('model_analyzer_testlib.py:70:BuildFullModel', tfprof_node.children[5].name) self.assertEqual( 'model_analyzer_testlib.py:70:BuildFullModel (gradient)', tfprof_node.children[6].name) self.assertEqual('model_analyzer_testlib.py:72:BuildFullModel', tfprof_node.children[7].name)
def testConv3D2x2x2Filter1x2x1Dilation(self): if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): self._VerifyDilatedConvValues(tensor_in_sizes=[1, 4, 6, 3, 1], filter_in_sizes=[2, 2, 2, 1, 1], stride=1, padding="VALID", dilations=[1, 2, 1])
def testIsMklEnabled(self): # This test doesn't assert anything. # It ensures the py wrapper function is generated correctly. if test_util.IsMklEnabled(): print("MKL is enabled") else: print("MKL is disabled")
def __init__( self, model_path='weights/MaskRCNN-R50C41x-COCO_finetune-docrop_and_rotate_24500.pb', canvas_size=512, debug=False): if not tf.test.is_gpu_available(): from tensorflow.python.framework import test_util assert get_tf_version_tuple() >= (1, 7) and test_util.IsMklEnabled(), \ "Inference requires either GPU support or MKL support!" self.canvas_size = canvas_size self.debug = debug self.id_to_class_name = { 1: 'page', 2: 'profile_image', 3: 'van_tay', 4: 'passport_code' } self.resizer = CustomResize(self.canvas_size, self.canvas_size) print('Loading model at', model_path) self.graph = load_graph(model_path) self.input_tensor = self.graph.get_tensor_by_name('import/image:0') self.output_node_name = [ 'output/boxes', 'output/scores', 'output/labels', 'output/masks' ] self.outputs_tensor = [ self.graph.get_tensor_by_name('import/{}:0'.format(each_node)) for each_node in self.output_node_name ] self.config = tf.compat.v1.ConfigProto() # self.config.gpu_options.allow_growth = True self.config.gpu_options.per_process_gpu_memory_fraction = 0.1 self.sess = tf.compat.v1.Session(config=self.config, graph=self.graph) self.predict_crop(np.zeros((200, 200, 3), dtype=np.uint8)) print('Loaded model!')
def testCreateMemDecBlockedFormat(self): """Try to create the mkl concat operation when one of the input's memory descriptor is in blocked format """ if test_util.IsMklEnabled(): s0 = np.ones((1, 8188, 4092, 1), dtype=np.uint8).astype(np.float32) s1 = array_ops.strided_slice(s0, [0, 1, 1, 0], [0, -1, -1, 0], [1, 1, 1, 1], begin_mask=9, end_mask=9) s2 = array_ops.slice(s1, [0, 0, 0, 0], [-1, -1, -1, 1]) s3_1 = array_ops.slice(s2, [0, 4, 4, 0], [-1, 8178, 4082, 1]) s3_2 = array_ops.slice(s2, [0, 4, 4, 0], [-1, 8178, 4082, 1]) filter4_1 = constant_op.constant([[[[1.18, -0.51]]]]) s4_1 = nn_ops.conv2d(s3_1, filter4_1, strides=[1, 1, 1, 1], padding="VALID") filter4_2 = constant_op.constant([[[[1.38, -0.11]]]]) s4_2 = nn_ops.conv2d(s3_2, filter4_2, strides=[1, 1, 1, 1], padding="VALID") s5_1 = array_ops.slice(s4_1, [0, 6, 6, 0], [-1, 1, 1, -1]) s5_2 = array_ops.slice(s4_2, [0, 6, 6, 0], [-1, 1, 1, -1]) x_concat = array_ops.concat([s5_1, s5_2], 3) self.evaluate( x_concat ) # This test is only meant to check the creation is not crashed
def testResetMemoryStatsCPU(self): if test_util.IsMklEnabled(): # TODO(gzmkl) work with Google team to address design issue in allocator.h self.skipTest('MklCPUAllocator does not throw exception. So skip test.') with self.assertRaisesRegex(ValueError, 'Cannot reset memory stats'): config.reset_memory_stats('CPU:0')
def testAnalysisAndAllocations(self): run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() config = config_pb2.ConfigProto(device_count={'CPU': 3}) with session.Session(config=config) as sess: with ops.device('/cpu:0'): num1 = variables.Variable(1.0, name='num1') with ops.device('/cpu:1'): num2 = variables.Variable(2.0, name='num2') with ops.device('/cpu:2'): result = num1 + num2 + num1 * num2 sess.run(variables.global_variables_initializer()) sess.run(result, options=run_options, run_metadata=run_metadata) self.assertTrue(run_metadata.HasField('step_stats')) tl = timeline.Timeline(run_metadata.step_stats) step_analysis = tl.analyze_step_stats() ctf = step_analysis.chrome_trace.format_to_string() self._validateTrace(ctf) maximums = step_analysis.allocator_maximums cpuname = 'mklcpu' if test_util.IsMklEnabled() else 'cpu' self.assertTrue(cpuname in maximums) cpu_max = maximums[ 'cuda_host_bfc'] if 'cuda_host_bfc' in maximums else maximums[cpuname] # At least num1 + num2, both float32s (4 bytes each) self.assertGreater(cpu_max.num_bytes, 8) self.assertGreater(cpu_max.timestamp, 0) self.assertTrue('num1' in cpu_max.tensors or 'num1/read' in cpu_max.tensors) self.assertTrue('num2' in cpu_max.tensors or 'num2/read' in cpu_max.tensors)
def _maybe_skip(self, mode): if mode == 'cuda' and not test.is_gpu_available(cuda_only=True): self.skipTest('No GPU is available') if mode == 'mkl' and not test_util.IsMklEnabled(): self.skipTest('MKL is not enabled') # Test will fail on machines without AVX512f, e.g., Broadwell isAVX512f = _pywrap_utils.IsBF16SupportedByOneDNNOnThisCPU() if mode == 'mkl' and not isAVX512f: self.skipTest('Skipping test due to non-AVX512f machine')
def testGetMemoryInfoCPU(self): if test_util.IsMklEnabled(): # TODO(gzmkl) work with Google team to address design issue in allocator.h self.skipTest('MklCPUAllocator does not throw exception. So skip test.') with self.assertRaisesRegex(ValueError, 'Allocator stats not available'): config.get_memory_info('CPU:0') with self.assertRaisesRegex(ValueError, 'Allocator stats not available'): config.get_memory_usage('CPU:0')
def testSmallNetworkCost(self): image = array_ops.placeholder(dtypes.float32, shape=[1, 28, 28, 1]) label = array_ops.placeholder(dtypes.float32, shape=[1, 10]) w = variables.Variable( random_ops.truncated_normal([5, 5, 1, 32], stddev=0.1)) b = variables.Variable(random_ops.truncated_normal([32], stddev=0.1)) conv = nn_ops.conv2d(image, w, strides=[1, 1, 1, 1], padding="SAME") h_conv = nn_ops.relu(conv + b) h_conv_flat = array_ops.reshape(h_conv, [1, -1]) w_fc = variables.Variable( random_ops.truncated_normal([25088, 10], stddev=0.1)) b_fc = variables.Variable(random_ops.truncated_normal([10], stddev=0.1)) y_conv = nn_ops.softmax(math_ops.matmul(h_conv_flat, w_fc) + b_fc) cross_entropy = math_ops.reduce_mean( -math_ops.reduce_sum(label * math_ops.log(y_conv), axis=[1])) _ = adam.AdamOptimizer(1e-4).minimize(cross_entropy) mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph()) report = cost_analyzer.GenerateCostReport(mg) # Print the report to make it easier to debug print("{}".format(report)) self.assertTrue(b"MatMul" in report) self.assertTrue(b"ApplyAdam" in report) self.assertTrue(b"Conv2DBackpropFilter" in report) self.assertTrue(b"Softmax" in report) # When mkl is enabled, Conv2D and MatMul op followed by # 1-dimension Add in this graph will be fused, but not # in the mkl disabled case. expected_matmul_count = 2 op_types = [b"MatMul", b"Conv2DBackpropFilter"] if not test_util.IsMklEnabled(): self.assertTrue(b"Conv2D" in report) expected_matmul_count = 3 op_types.append(b"Conv2D") for op_type in op_types: matcher = re.compile( br"\s+" + op_type + br",\s*(\d+),\s*(\d+),\s*([\d\.eE+-]+)%,\s*" + br"([\d\.eE+-]+)%,\s*(-?\d+),\s*(\d+),", re.MULTILINE) m = matcher.search(report) op_count = int(m.group(1)) # upper = int(m.group(5)) lower = int(m.group(6)) if op_type == b"MatMul": self.assertEqual(expected_matmul_count, op_count) else: self.assertEqual(1, op_count) self.assertTrue(0 <= lower)
def testSoftmaxGradGradExtendType(self): if test_util.IsMklEnabled(): inputs = constant_op.constant([[-2, -1, 1, 3], [5, 7, 8, 9]], dtype=dtypes.bfloat16) r = nn_ops.softmax(inputs) r_g = gradients_impl.gradients(r, inputs)[0] with self.cached_session(): error = gradient_checker.compute_gradient_error( inputs, inputs.get_shape(), r_g, r_g.get_shape()) self.assertLess(error, 1e-4)
def testConv3D2x2x2Filter1x2x1Dilation(self): ctx = context.context() is_eager = ctx is not None and ctx.executing_eagerly() if test.is_gpu_available(cuda_only=True) or \ (test_util.IsMklEnabled() and is_eager is False): self._VerifyDilatedConvValues(tensor_in_sizes=[1, 4, 6, 3, 1], filter_in_sizes=[2, 2, 2, 1, 1], stride=1, padding="VALID", dilations=[1, 2, 1])
def testGetOps(self): default_ops = 'NoOp:NoOp,_Recv:RecvOp,_Send:SendOp' graphs = [ text_format.Parse(d, graph_pb2.GraphDef()) for d in [GRAPH_DEF_TXT, GRAPH_DEF_TXT_2] ] ops_and_kernels = selective_registration_header_lib.get_ops_and_kernels( 'rawproto', self.WriteGraphFiles(graphs), default_ops) matmul_prefix = '' if test_util.IsMklEnabled(): matmul_prefix = 'Mkl' self.assertListEqual( [ ('AccumulateNV2', None), # ('BiasAdd', 'BiasOp<CPUDevice, float>'), # ('MatMul', matmul_prefix + 'MatMulOp<CPUDevice, double, false >'), # ('MatMul', matmul_prefix + 'MatMulOp<CPUDevice, float, false >'), # ('NoOp', 'NoOp'), # ('Reshape', 'ReshapeOp'), # ('_Recv', 'RecvOp'), # ('_Send', 'SendOp'), # ], ops_and_kernels) graphs[0].node[0].ClearField('device') graphs[0].node[2].ClearField('device') ops_and_kernels = selective_registration_header_lib.get_ops_and_kernels( 'rawproto', self.WriteGraphFiles(graphs), default_ops) self.assertListEqual( [ ('AccumulateNV2', None), # ('BiasAdd', 'BiasOp<CPUDevice, float>'), # ('MatMul', matmul_prefix + 'MatMulOp<CPUDevice, double, false >'), # ('MatMul', matmul_prefix + 'MatMulOp<CPUDevice, float, false >'), # ('NoOp', 'NoOp'), # ('Reshape', 'ReshapeOp'), # ('_Recv', 'RecvOp'), # ('_Send', 'SendOp'), # ], ops_and_kernels)
def maybe_skip_test(self, mode): if mode == 'cuda': # It seems the windows os cannot correctly query the cuda_version. # TODO(kaixih@nvidia): Remove this when it works. if os.name == 'nt': self.skipTest("This test doesn't support Windows") # The cublaslt matmul with gelu epilog is only supported since cuda 11.4. if not test.is_gpu_available(cuda_only=True): self.skipTest('This test requires GPU.') cuda_version_str = sysconfig.get_build_info().get( 'cuda_version', '0.0') cuda_version = tuple([int(x) for x in cuda_version_str.split('.')]) if cuda_version < (11, 4): self.skipTest('This test requires CUDA >= 11.4.') if mode == 'mkl' and not test_util.IsMklEnabled(): self.skipTest('MKL is not enabled.')
def run(base_dir, ext="jpg", store_results='', smart=False): if smart: raise NotImplementedError using_gpu = tf.test.is_gpu_available() if using_gpu: logger.info("Running on GPU") else: from tensorflow.python.framework import test_util as tftest_util assert tftest_util.IsMklEnabled( ), "This tensorflow is not compiled with MKL. Abort." logger.warn("Running on CPU") results = [] # Download and uncompress model checkpoint_url = "http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz" checkpoints_dir = s3dexp.config.CKPT_DIR checkpoint_path = os.path.join(checkpoints_dir, 'mobilenet_v1_1.0_224.ckpt') if not tf.gfile.Exists(checkpoints_dir): tf.gfile.MakeDirs(checkpoints_dir) dataset_utils.download_and_uncompress_tarball(checkpoint_url, checkpoints_dir) with tf.Graph().as_default(): logger.info("Creating compute graph ...") ######################################## # Select the model ######################################## network_fn = nets_factory.get_network_fn('mobilenet_v1', num_classes=1001, is_training=False) image_size = mobilenet_v1.mobilenet_v1.default_image_size ######################################## # Define input and preprocessing tensors ######################################## # crucial to specify dtype=tf.unit8. Otherwise will get wrong predictions. inputs = tf.placeholder(dtype=tf.uint8, shape=(None, image_size, image_size, 3)) preprocessing_fn = get_preprocessing('mobilenet_v1') processed_images = tf.map_fn( lambda x: preprocessing_fn(x, image_size, image_size), inputs, dtype=tf.float32) ######################################## # Create the compute graph ######################################## logits, _ = network_fn(processed_images) probabilities = tf.nn.softmax(logits) # https://github.com/tensorflow/tensorflow/issues/4196 # https://www.tensorflow.org/programmers_guide/using_gpu config = tf.ConfigProto() # config.gpu_options.allow_growth = True # config.gpu_options.per_process_gpu_memory_fraction = 0.4 with tf.Session(config=config) as sess: logger.info("Loading checkpoint from %s" % checkpoint_path) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) logger.info("Warm up with a fake image") fakeimages = np.random.randint(0, 256, size=(1, image_size, image_size, 3), dtype=np.uint8) _ = sess.run(probabilities, feed_dict={inputs: fakeimages}) ######################################## # walk through directory and inference ######################################## for path in recursive_glob(base_dir, "*.{}".format(ext)): tic = time.time() if not smart: # 0. read from disk with open(path, 'rb') as f: buf = f.read() read_time = time.time() - tic # 1. image decode arr = cv2.imdecode(np.frombuffer(buf, np.int8), cv2.IMREAD_COLOR) decode_time = time.time() - tic else: raise NotImplementedError h, w = arr.shape[:2] # 2. Run inference # resize arr_resized = cv2.resize(arr, (image_size, image_size), interpolation=cv2.INTER_AREA) images = np.expand_dims(arr_resized, 0) _ = sess.run(probabilities, feed_dict={inputs: images}) all_time = time.time() - tic logger.debug( "Read {:.1f} ms, Decode {:.1f}, Total {:.1f}. {}".format( read_time * 1000, decode_time * 1000, all_time * 1000, path)) results.append({ 'path': path, 'read_ms': read_time * 1000, 'decode_ms': decode_time * 1000, 'total_ms': all_time * 1000, 'size': len(buf), 'height': h, 'width': w }) if store_results: logger.info("Writing {} results to DB".format(len(results))) dbsess = dbutils.get_session() for r in results: keys_dict = { 'path': r['path'], 'basename': os.path.basename(r['path']), 'expname': 'mobilenet_inference', 'device': 'gpu' if using_gpu else 'cpu', 'disk': 'smart' if smart else 'hdd' } dbutils.insert_or_update_one(dbsess, dbmodles.AppExp, keys_dict=keys_dict, vals_dict={ 'read_ms': r['read_ms'], 'decode_ms': r['decode_ms'], 'total_ms': r['total_ms'], 'size': r['size'], 'height': r['height'], 'width': r['width'] }) dbsess.commit() dbsess.close()
# 5b) I find it harder to adjust the hyperparameters for predicting sin. # I have noticed it is harder to get a good model at 10 compared to 0 when using data within that interval. # However it looks like the model gets more precise when expaning the traning data to 15 or 20 # Imports import matplotlib.pyplot as plt import numpy as np import time from keras.models import Sequential from keras.layers import Dense from tensorflow.python.framework import test_util from tensorflow.python.client import device_lib print(device_lib.list_local_devices()) print(test_util.IsMklEnabled()) # Load training data x = np.random.uniform(0, 10, (20000, 1)) y = np.sin(x) # # Define model # model = Sequential() # model.add(Dense(140, input_dim=1, activation='relu')) # model.add(Dense(1)) # model.compile(loss='mean_squared_error', optimizer='adam') # prefit = time.time() # model.fit(x, y, epochs=100, batch_size=50) # postfit = time.time() # Define model model = Sequential()
def collect_env_info(): """ Returns: str - a table contains important information about the environment """ data = [] data.append(("sys.platform", sys.platform)) data.append(("Python", sys.version.replace("\n", ""))) data.append( ("Tensorpack", __git_version__ + " @" + os.path.dirname(tensorpack.__file__))) data.append(("Numpy", np.__version__)) data.append(("TensorFlow", tfv1.VERSION + "/" + tfv1.GIT_VERSION + " @" + os.path.dirname(tf.__file__))) data.append(("TF Compiler Version", tfv1.COMPILER_VERSION)) has_cuda = tf.test.is_built_with_cuda() data.append(("TF CUDA support", has_cuda)) try: from tensorflow.python.framework import test_util data.append(("TF MKL support", test_util.IsMklEnabled())) except Exception: pass try: from tensorflow.python.framework import test_util data.append(("TF XLA support", test_util.is_xla_enabled())) except Exception: pass if has_cuda: data.append(("Nvidia Driver", find_library("nvidia-ml"))) data.append(("CUDA libs", find_library("cudart"))) data.append(("CUDNN libs", find_library("cudnn"))) for k, v in parse_TF_build_info().items(): data.append((k, v)) data.append(("NCCL libs", find_library("nccl"))) # List devices with NVML data.append(("CUDA_VISIBLE_DEVICES", os.environ.get("CUDA_VISIBLE_DEVICES", "Unspecified"))) try: devs = defaultdict(list) with NVMLContext() as ctx: for idx, dev in enumerate(ctx.devices()): devs[dev.name()].append(str(idx)) for devname, devids in devs.items(): data.append(("GPU " + ",".join(devids), devname)) except Exception: data.append(("GPU", "Not found with NVML")) vram = psutil.virtual_memory() data.append( ("Free RAM", "{:.2f}/{:.2f} GB".format(vram.available / 1024**3, vram.total / 1024**3))) data.append(("CPU Count", psutil.cpu_count())) # Other important dependencies: try: import horovod data.append( ("Horovod", horovod.__version__ + " @" + os.path.dirname(horovod.__file__))) except ImportError: pass try: import cv2 data.append(("cv2", cv2.__version__)) except ImportError: pass import msgpack data.append(("msgpack", ".".join([str(x) for x in msgpack.version]))) has_prctl = True try: import prctl _ = prctl.set_pdeathsig # noqa except Exception: has_prctl = False data.append(("python-prctl", has_prctl)) return tabulate(data)
def testArithmeticOptimizationActive(self): """Tests that tfdbg can dump the tensor from nodes created by Grappler.""" with session.Session( config=_grappler_enabled_session_config()) as sess: u = variables.VariableV1([[1, 2], [3, 4]], name="u", dtype=dtypes.float32) # The next two ops should be optimized by Grappler into a single op: # either an AddN op or a Mul op. x = math_ops.add(u, u) x = math_ops.add(x, u) y = math_ops.multiply(x, u) sess.run(variables.global_variables_initializer()) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_utils.watch_graph(run_options, sess.graph, debug_ops=["DebugIdentity"], debug_urls=[self._debug_url]) run_metadata = config_pb2.RunMetadata() run_result = sess.run(y, options=run_options, run_metadata=run_metadata) self.assertAllClose(run_result, [[3, 12], [27, 48]]) dump_data = debug_data.DebugDumpDir( self._dump_root, partition_graphs=run_metadata.partition_graphs, validate=True) original_node_names = set(op.name for op in sess.graph.get_operations()) dumped_node_names = set(dump_data.nodes()) grappler_created_node_names = dumped_node_names - original_node_names grappler_removed_node_names = original_node_names - dumped_node_names # Assert that Grappler should have replaced some of the nodes from the # original graph with new nodes. self.assertTrue(grappler_created_node_names) self.assertTrue(grappler_removed_node_names) # Iterate through the nodes created by Grappler. One of them should be # be the result of replacing the original add ops with an AddN op or a # Mul op. found_optimized_node = False for grappler_node_name in grappler_created_node_names: node_op_type = dump_data.node_op_type(grappler_node_name) # Look for the node created by Grappler's arithmetic optimization. if ((test_util.IsMklEnabled() and node_op_type in ("_MklAddN", "Mul")) or (node_op_type in ("AddN", "Mul"))): datum = dump_data.get_tensors(grappler_node_name, 0, "DebugIdentity") self.assertEqual(1, len(datum)) self.assertAllClose(datum[0], [[3, 6], [9, 12]]) found_optimized_node = True break self.assertTrue( found_optimized_node, "Failed to find optimized node created by Grappler's arithmetic " "optimization.")
def run(base_dir, ext="jpg", store_results='', smart=False, batch_size=8, num_parallel_calls=None, etl_only=False): # adjust default parameters if not num_parallel_calls: num_parallel_calls = batch_size # GPU or CPU? using_gpu = tf.test.is_gpu_available() if using_gpu: logger.info("Running on GPU") else: from tensorflow.python.framework import test_util as tftest_util assert tftest_util.IsMklEnabled( ), "This tensorflow is not compiled with MKL. Abort." logger.warn("Running on CPU") # Download and uncompress model checkpoint_url = "http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz" checkpoints_dir = s3dexp.config.CKPT_DIR checkpoint_path = os.path.join(checkpoints_dir, 'mobilenet_v1_1.0_224.ckpt') if not tf.gfile.Exists(checkpoints_dir): tf.gfile.MakeDirs(checkpoints_dir) dataset_utils.download_and_uncompress_tarball(checkpoint_url, checkpoints_dir) # Prepare the `load_and_preprocess_fn` function to be passed into Dataset.map # NOTE: in graph mode, this function takes in tensor and adds operators to the graph if not smart: def load_and_preprocess_fn(path): # path is tensor # 0. read from disk raw = tf.read_file(path) # 1. image decode image = tf.image.decode_jpeg( raw, channels=3 ) # tf.image.decoe_image() doesn't return shape, causing error https://stackoverflow.com/questions/44942729/tensorflowvalueerror-images-contains-no-shape # 2. resize image_resize = tf.image.resize_images(image, (image_size, image_size)) return image_resize # Tensor else: # TODO use our smart storage here raise NotImplementedError def load_and_preprocess_fn(path): def smart_fn(path): # this pure Python funciton will actually be called many times, by multiple threads if num_parallel_calls>1 logger.debug("Enter smart_fn. Path {}".format(path)) # TODO replace with real smart storage logic fakeimage = np.random.randint(0, 256, size=(image_size, image_size, 3), dtype=np.uint8) logger.debug("Exit smart_fn") return fakeimage out_op = tf.py_func(smart_fn, [path], tf.uint8) out_op.set_shape([image_size, image_size, 3]) # must explicitly set shape to avoid error return out_op results = [] with tf.Graph().as_default(): logger.info("Creating compute graph ...") ######################################## # Select the model ######################################## network_fn = nets_factory.get_network_fn('mobilenet_v1', num_classes=1001, is_training=False) image_size = mobilenet_v1.mobilenet_v1.default_image_size ######################################## # Create a tf.data.Dataset with batching ######################################## all_paths = list(recursive_glob(base_dir, "*.{}".format(ext))) logger.info("Found {} paths".format(len(all_paths))) path_ds = tf.data.Dataset.from_tensor_slices(all_paths) image_ds = path_ds.map( load_and_preprocess_fn, num_parallel_calls=num_parallel_calls).batch(batch_size) # create iterator iterator = image_ds.make_initializable_iterator() batch_of_images = iterator.get_next() ######################################## # Define input and preprocessing tensors ######################################## preprocessing_fn = get_preprocessing('mobilenet_v1') processed_images = tf.map_fn( lambda x: preprocessing_fn(x, image_size, image_size), batch_of_images, dtype=tf.float32) ######################################## # Create the compute graph ######################################## logits, _ = network_fn(processed_images) probabilities = tf.nn.softmax(logits) config = tf.ConfigProto() with tf.Session(config=config) as sess: logger.info("Loading checkpoint from %s" % checkpoint_path) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # initialize Dataset iterator sess.run(iterator.initializer) logger.info("Warm up with a fake batch") fakeimages = np.random.randint(0, 256, size=(batch_size, image_size, image_size, 3)).astype(np.float32) _ = sess.run(probabilities, feed_dict={processed_images: fakeimages}) try: count_image = 0 count_batch = 0 elapsed = 0. tic = time.time() while True: if etl_only: res = sess.run(batch_of_images) else: res = sess.run(probabilities) toc = time.time() logger.debug( "Batch {}, batch size {}, elapsed {:.1f}".format( count_batch, res.shape[0], 1000 * (toc - tic - elapsed))) if res.shape[0] < batch_size: # discard last batch continue else: elapsed = toc - tic count_batch += 1 count_image += batch_size except tf.errors.OutOfRangeError: pass finally: logger.info( "Ran {} batches, {} images, batch size {}, avg ms/image {:.2f}" .format(count_batch, count_image, batch_size, elapsed * 1000 / count_image))
def collect_env_info(): """ Returns: str - a table contains important information about the environment """ data = [] data.append(("Python", sys.version.replace("\n", ""))) data.append(("Tensorpack", __git_version__)) data.append(("TensorFlow", tfv1.VERSION + "/" + tfv1.GIT_VERSION)) data.append(("TF Compiler Version", tfv1.COMPILER_VERSION)) has_cuda = tf.test.is_built_with_cuda() data.append(("TF CUDA support", has_cuda)) try: from tensorflow.python.framework import test_util data.append(("TF MKL support", test_util.IsMklEnabled())) except Exception: pass try: from tensorflow.python.framework import test_util data.append(("TF XLA support", test_util.is_xla_enabled())) except Exception: pass if has_cuda: data.append(("Nvidia Driver", find_library("nvidia-ml"))) data.append(("CUDA", find_library("cudart"))) data.append(("CUDNN", find_library("cudnn"))) data.append(("NCCL", find_library("nccl"))) # List devices with NVML data.append(("CUDA_VISIBLE_DEVICES", os.environ.get("CUDA_VISIBLE_DEVICES", str(None)))) try: devs = defaultdict(list) with NVMLContext() as ctx: for idx, dev in enumerate(ctx.devices()): devs[dev.name()].append(str(idx)) for devname, devids in devs.items(): data.append(("GPU " + ",".join(devids), devname)) except Exception: data.append(("GPU", "Not found with NVML")) # Other important dependencies try: import horovod data.append(("horovod", horovod.__version__)) except ImportError: pass try: import cv2 data.append(("cv2", cv2.__version__)) except ImportError: pass import msgpack data.append(("msgpack", ".".join([str(x) for x in msgpack.version]))) has_prctl = True try: import prctl _ = prctl.set_pdeathsig except Exception: has_prctl = False data.append(("python-prctl", has_prctl)) return tabulate(data)
def check_tf_mkl(self): assert ( test_util.IsMklEnabled()), "tensorflow doesn't have mkl enabled"
parser.add_argument( '--config', help="A list of KEY=VALUE to overwrite those defined in config.py", nargs='+') parser.add_argument('--compact', help='Save a model to .pb') parser.add_argument('--serving', help='Save a model to serving file') args = parser.parse_args() if args.config: cfg.update_args(args.config) register_coco(cfg.DATA.BASEDIR) # add COCO datasets to the registry MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model() if not tf.test.is_gpu_available(): from tensorflow.python.framework import test_util assert get_tf_version_tuple() >= (1, 7) and test_util.IsMklEnabled(), \ "Inference requires either GPU support or MKL support!" assert args.load finalize_configs(is_training=False) if args.predict or args.visualize: cfg.TEST.RESULT_SCORE_THRESH = cfg.TEST.RESULT_SCORE_THRESH_VIS if args.visualize: do_visualize(MODEL, args.load) else: predcfg = PredictConfig( model=MODEL, session_init=get_model_loader(args.load), input_names=MODEL.get_inference_tensor_names()[0], output_names=MODEL.get_inference_tensor_names()[1])
def collect_env_info(): """ Returns: str - a table contains important information about the environment """ data = [] data.append(("Python", sys.version.replace("\n", ""))) data.append(("Tensorpack", __git_version__)) data.append(("TensorFlow", tfv1.VERSION + "/" + tfv1.GIT_VERSION)) data.append(("TF Compiler Version", tfv1.COMPILER_VERSION)) data.append(("TF CUDA support", tf.test.is_built_with_cuda())) try: from tensorflow.python.framework import test_util data.append(("TF MKL support", test_util.IsMklEnabled())) except Exception: pass try: from tensorflow.python.framework import test_util data.append(("TF XLA support", test_util.is_xla_enabled())) except Exception: pass def find_library_with_ldconfig(ldconfig, lib): # Read sonames from ldconfig: may not be accurate # similar to from ctypes.util import find_library, but with full path expr = r'\s+(lib%s\.[^\s]+)\s+\(.*=>\s+(.*)' % (re.escape(lib)) res = re.search(expr, ldconfig) if not res: return None else: ret = res.group(2) return os.path.realpath(ret) try: with change_env('LC_ALL', 'C'), change_env('LANG', 'C'): ldconfig, ret = subproc_call("ldconfig -p") assert ret == 0 ldconfig = ldconfig.decode('utf-8') def find_library(x): return find_library_with_ldconfig(ldconfig, x) except Exception: from ctypes.util import find_library data.append(("CUDA", find_library("cudart"))) data.append(("CUDNN", find_library("cudnn"))) data.append(("NCCL", find_library("nccl"))) # List devices with NVML data.append(("CUDA_VISIBLE_DEVICES", os.environ.get("CUDA_VISIBLE_DEVICES", str(None)))) try: devs = defaultdict(list) with NVMLContext() as ctx: for idx, dev in enumerate(ctx.devices()): devs[dev.name()].append(str(idx)) for devname, devids in devs.items(): data.append(("GPU " + ",".join(devids) + " Model", devname)) except Exception: pass # Other important dependencies try: import horovod data.append(("horovod", horovod.__version__)) except ImportError: pass try: import cv2 data.append(("cv2", cv2.__version__)) except ImportError: pass import msgpack data.append(("msgpack", ".".join([str(x) for x in msgpack.version]))) has_prctl = True try: import prctl _ = prctl.set_pdeathsig except Exception: has_prctl = False data.append(("python-prctl", has_prctl)) return tabulate(data)
def _maybe_skip(self, mode): if mode == 'cuda' and not test.is_gpu_available(cuda_only=True): self.skipTest('No GPU is available') if mode == 'mkl' and not test_util.IsMklEnabled(): self.skipTest('MKL is not enabled')
def _maybe_skip(self, mode): if mode == 'cuda': self.skipTest('This test does not pass on GPU.') if mode == 'mkl' and not test_util.IsMklEnabled(): self.skipTest('MKL is not enabled.')
logger.warn( "TF<1.6 has a bug which may lead to crash in FasterRCNN if you're unlucky." ) args = parser.parse_args() if args.config: cfg.update_args(args.config) MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model() YCBVDetectionDataset( ) # initialize the config with information from our dataset if args.visualize or args.evaluate or args.predict: if not tf.test.is_gpu_available(): from tensorflow.python.framework import test_util assert test_util.IsMklEnabled( ), "Inference requires either GPU support or MKL support!" assert args.load finalize_configs(is_training=False) if args.predict or args.visualize: cfg.TEST.RESULT_SCORE_THRESH = cfg.TEST.RESULT_SCORE_THRESH_VIS if args.visualize: do_visualize(MODEL, args.load) else: predcfg = PredictConfig( model=MODEL, session_init=get_model_loader(args.load), input_names=MODEL.get_inference_tensor_names()[0], output_names=MODEL.get_inference_tensor_names()[1]) if args.predict: