def myinit(session_object, target='', graph=None, config=None): print("Intercepted!") optimizer_options = tf.OptimizerOptions() config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=optimizer_options)) config.gpu_options.allow_growth = True return oldinit(session_object, target, graph, config)
def testDecoderSampleTargetSequences(self): p = self._DecoderParams(vn_config=py_utils.VariationalNoiseParams( None, False, False), num_classes=8) p.target_seq_len = 5 p.random_seed = 1 config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions(do_function_inlining=False))) with self.session(use_gpu=False, config=config) as sess: tf.set_random_seed(8372740) np.random.seed(35315) dec = p.cls(p) source_sequence_length = 5 batch_size = 4 source_encodings = tf.constant(np.random.normal( size=[source_sequence_length, batch_size, p.source_dim]), dtype=tf.float32) source_encoding_padding = tf.constant( [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 1.0], [0.0, 1.0, 1.0, 1.0], [0.0, 1.0, 1.0, 1.0]], dtype=tf.float32) encoder_outputs = py_utils.NestedMap( encoded=source_encodings, padding=source_encoding_padding) sampled_sequences = dec.SampleTargetSequences( dec.theta, encoder_outputs, random_seed=tf.to_int32(123)) self.assertAllEqual([batch_size, p.target_seq_len], sampled_sequences.ids.shape) tf.global_variables_initializer().run() decoder_output = sess.run(sampled_sequences) print('ids=%s' % np.array_repr(decoder_output.ids)) lens = np.sum(1 - decoder_output.paddings, axis=1) print('lens=%s' % lens) # pyformat: disable # pylint: disable=bad-whitespace,bad-continuation expected_ids = [[6, 2, 2, 2, 2], [0, 0, 7, 5, 1], [6, 1, 5, 2, 2], [6, 7, 7, 4, 5]] # pylint: enable=bad-whitespace,bad-continuation # pyformat: enable expected_lens = [2, 5, 4, 5] self.assertAllEqual(expected_lens, lens) self.assertAllEqual(expected_ids, decoder_output.ids) # Sample again with the same random seed. decoder_output2 = sess.run( dec.SampleTargetSequences(dec.theta, encoder_outputs, random_seed=tf.to_int32(123))) # Get the same output. self.assertAllEqual(decoder_output.ids, decoder_output2.ids) self.assertAllEqual(decoder_output.paddings, decoder_output2.paddings) # Sample again with a different random seed. decoder_output3 = sess.run( dec.SampleTargetSequences(dec.theta, encoder_outputs, random_seed=tf.to_int32(123456))) # Get different sequences. self.assertNotAllClose(expected_ids, decoder_output3.ids)
def create_session_config(log_device_placement=True, enable_graph_rewriter=False, gpu_mem_fraction=0.95, xla_jit_level=tf.OptimizerOptions.OFF, inter_op_parallelism_threads=0, intra_op_parallelism_threads=0): if enable_graph_rewriter: rewrite_options = rewriter_config_pb2.RewriterConfig() rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.ON graph_options = tf.GraphOptions(rewrite_options=rewrite_options) else: graph_options = tf.GraphOptions(optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L1, do_function_inlining=False, global_jit_level=xla_jit_level, )) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_mem_fraction) config = tf.ConfigProto( allow_soft_placement=True, graph_options=graph_options, gpu_options=gpu_options, log_device_placement=log_device_placement, inter_op_parallelism_threads=inter_op_parallelism_threads, intra_op_parallelism_threads=intra_op_parallelism_threads, isolate_session_state=True) return config
def create_session_config(log_device_placement=False, enable_graph_rewriter=False, gpu_mem_fraction=0.95, use_tpu=False): """The TensorFlow Session config to use.""" if use_tpu: graph_options = tf.GraphOptions() else: if enable_graph_rewriter: rewrite_options = rewriter_config_pb2.RewriterConfig() rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.ON graph_options = tf.GraphOptions(rewrite_options=rewrite_options) else: graph_options = tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L1, do_function_inlining=False)) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_mem_fraction) # horovod config # Horovod: set visible_device_list gpu_options.allow_growth = True gpu_options.visible_device_list = str(hvd.local_rank()) config = tf.ConfigProto( allow_soft_placement=True, graph_options=graph_options, gpu_options=gpu_options, log_device_placement=log_device_placement) return config
def session_config(): optimizer_options = tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0) config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=optimizer_options)) config.operation_timeout_in_ms = 10 * 1000 # abort after 10 seconds return config
def create_session_config(log_device_placement=False, enable_graph_rewriter=False, gpu_mem_fraction=0.95, use_tpu=False): """The TensorFlow Session config to use.""" if use_tpu: graph_options = tf.GraphOptions() else: if enable_graph_rewriter: rewrite_options = rewriter_config_pb2.RewriterConfig() rewrite_options.optimizers.append("pruning") rewrite_options.optimizers.append("constfold") rewrite_options.optimizers.append("arithmetic") rewrite_options.optimizers.append("layout") graph_options = tf.GraphOptions(rewrite_options=rewrite_options) else: graph_options = tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L1, do_function_inlining=False)) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_mem_fraction) config = tf.ConfigProto(allow_soft_placement=True, graph_options=graph_options, gpu_options=gpu_options, log_device_placement=log_device_placement) return config
def create_session_config(log_device_placement=False, enable_graph_rewriter=False, gpu_mem_fraction=0.95, use_tpu=False, inter_op_parallelism_threads=0, intra_op_parallelism_threads=0): """The TensorFlow Session config to use.""" if use_tpu: graph_options = tf.GraphOptions() else: if enable_graph_rewriter: rewrite_options = rewriter_config_pb2.RewriterConfig() rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.ON graph_options = tf.GraphOptions(rewrite_options=rewrite_options) else: graph_options = tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L1, do_function_inlining=False)) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_mem_fraction) config = tf.ConfigProto( allow_soft_placement=True, graph_options=graph_options, gpu_options=gpu_options, log_device_placement=log_device_placement, inter_op_parallelism_threads=inter_op_parallelism_threads, intra_op_parallelism_threads=intra_op_parallelism_threads) return config
def default_config(): optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0) config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=optimizer_options)) config.log_device_placement = False config.allow_soft_placement = False return config
def test_resize_to_dynamic_shape(self): # Test input with some arbitrary shape. test_input = np.random.rand(128, 10, 10, 20, 5) val = tf.placeholder(tf.float32, shape=(128, 10, 10, 20, 5)) # Reshape to a random permutation of the input shape. We use a fixed seed # so that we get same results on CPU and nGraph, and we have to do some # hackery to make sure the actual op survives constant folding. seed = random.randint(0, 999999) shuffled_shape = tf.random_shuffle(tf.shape(val), seed=seed) out = tf.reshape(val, shuffled_shape) def run_test(sess): return sess.run(out, feed_dict={val: test_input}) # Disable as much optimization as we can. config = tf.ConfigProto( graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0, do_common_subexpression_elimination=False, do_constant_folding=False, do_function_inlining=False, ))) assert (self.without_ngraph(run_test, config) == self.with_ngraph( run_test, config)).all()
def GenerateTensorflowConfig( num_networks: int = 4, num_gpu: int = 1, growth: bool = False, gpu_memory_ratio: float = 0.95) -> tf.ConfigProto: num_networks += 1 num_cpu = cpu_count() optimizer_options = tf.OptimizerOptions( do_common_subexpression_elimination=True, do_constant_folding=True, do_function_inlining=True, opt_level=tf.OptimizerOptions.L1) graph_options = tf.GraphOptions(optimizer_options=optimizer_options) gpu_options = tf.GPUOptions(allow_growth=growth) config = tf.ConfigProto(device_count={'GPU': num_gpu}, allow_soft_placement=True, intra_op_parallelism_threads=num_cpu, inter_op_parallelism_threads=num_cpu, graph_options=graph_options, gpu_options=gpu_options) config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.OFF return config
def testFoo(self): dtype = tf.float32 cfg = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0, do_common_subexpression_elimination=True, do_function_inlining=True, do_constant_folding=True))) for noinline in [False, True]: g = tf.Graph() with g.as_default(): @function.Defun(dtype) def Cell(v): # If v is a vector [n, 1], x is a big square matrix. x = tf.tanh(v + tf.transpose(v, [1, 0])) return tf.reduce_sum(x, 1, keep_dims=True) @function.Defun(dtype) def Forward(x): for _ in range(10): x = Cell(x, noinline=noinline) return tf.reduce_sum(x, [0, 1]) x = tf.placeholder(dtype) y = Forward(x) dx, = tf.gradients([y], [x]) np.random.seed(321) inp = np.random.uniform(-1, 1, [16, 1]).astype(np.float32) with tf.Session(graph=g, config=cfg) as sess: ans = sess.run([y, dx], {x: inp}) print(ans[0], np.sum(ans[1])) self.assertAllClose(ans[0], 255.971, rtol=1e-3) self.assertAllClose(np.sum(ans[1]), 13.0408, rtol=1e-3)
def _testDecoderFPropFloatHelper(self, func_inline=False, num_decoder_layers=1, target_seq_len=5, residual_start=0): """Computes decoder from params and computes loss with random inputs.""" config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( do_function_inlining=func_inline))) with self.session(graph=tf.Graph(), use_gpu=False, config=config) as sess: tf.set_random_seed(8372749040) vn_config = py_utils.VariationalNoiseParams(None, False, False) p = self._DecoderParams(vn_config) p.rnn_layers = num_decoder_layers p.residual_start = residual_start p.target_seq_len = target_seq_len dec = p.cls(p) src_seq_len = 5 src_enc = tf.random_normal([src_seq_len, 2, 8], seed=9283748) src_enc_padding = tf.constant( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=tf.float32) encoder_outputs = py_utils.NestedMap(encoded=src_enc, padding=src_enc_padding) target_ids = tf.transpose( tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 15], [5, 6, 7, 8], [10, 5, 2, 5]], dtype=tf.int32)) target_labels = tf.transpose( tf.constant([[0, 1, 2, 3], [1, 2, 3, 4], [10, 11, 12, 13], [5, 7, 8, 10], [10, 5, 2, 4]], dtype=tf.int32)) target_paddings = tf.transpose( tf.constant([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [1, 1, 1, 1]], dtype=tf.float32)) target_transcripts = tf.constant( ['abcd', 'bcde', 'klmp', 'fghi', 'kfcf']) target_weights = 1.0 - target_paddings targets = py_utils.NestedMap({ 'ids': target_ids, 'labels': target_labels, 'weights': target_weights, 'paddings': target_paddings, 'transcripts': target_transcripts, }) metrics = dec.FPropDefaultTheta(encoder_outputs, targets) loss = metrics['loss'][0] correct_predicts = metrics['fraction_of_correct_next_step_preds'][ 0] summaries = tf.summary.merge( tf.get_collection(tf.GraphKeys.SUMMARIES)) tf.global_variables_initializer().run() loss_v, _ = sess.run([loss, correct_predicts]) summaries.eval() return loss_v
def run_config(params): optimizer_options = tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L1, do_function_inlining=True, ) graph_options = tf.GraphOptions(optimizer_options=optimizer_options, place_pruned_graph=True, enable_bfloat16_sendrecv=False, build_cost_model=0) gpu_options = tf.GPUOptions(allow_growth=True) session_config = tf.ConfigProto(allow_soft_placement=True, graph_options=graph_options, gpu_options=gpu_options) mirrored_strategy = tf.contrib.distribute.MirroredStrategy( num_gpus=params.gpu_num) run_config = tf.estimator.RunConfig( model_dir=params.output, save_summary_steps=params.summary_steps, save_checkpoints_steps=params.eval_steps, keep_checkpoint_max=1, log_step_count_steps=params.log_steps, session_config=session_config, train_distribute=mirrored_strategy, eval_distribute=mirrored_strategy) return run_config
def run(): # Setup TensorBoard graph_location = "/tmp/" + getpass.getuser() + "/tensorboard-logs/test" print('Saving graph to: %s' % graph_location) train_writer = tf.summary.FileWriter(graph_location) # Define the data a = tf.constant(np.full((4, 4), 1.5, dtype=np.float32), name='alpha') x = tf.get_variable('x', [4, 4], initializer=tf.zeros_initializer) y = tf.constant(np.full((4, 4), 1.0, dtype=np.float32), name='y') c = a * x axpy = c + y train_step = x.assign(axpy) with tf.control_dependencies([train_step]): train_op = tf.no_op('train_op') # Configure the session config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False, inter_op_parallelism_threads=1, graph_options=tf.GraphOptions(optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0, do_common_subexpression_elimination=False, do_constant_folding=False, do_function_inlining=False, ))) #config_ngraph_enabled = ngraph_bridge.update_config(config) config_ngraph_enabled = config # Create session and run with tf.Session(config=config_ngraph_enabled) as sess: print("Python: Running with Session") options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() event_times = [] sess.run(tf.global_variables_initializer()) for i in range(10): (result_axpy) = sess.run((train_op), options=options, run_metadata=run_metadata), print(i) event_times.append(timeline.Timeline(run_metadata.step_stats)) print("Final value: ", x.eval()) print("Writing event trace") with open('tf_event_trace.json', 'w') as f: f.write("[\n") for event in event_times: chrome_trace = event.generate_chrome_trace_format( show_dataflow=False) parsed_trace = json.loads(chrome_trace) for tr in parsed_trace['traceEvents']: f.write(json.dumps(tr) + ',\n') train_writer.add_graph(tf.get_default_graph())
def default_config(): """Create default config for running session.""" optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0) config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=optimizer_options)) config.log_device_placement = False config.allow_soft_placement = False return config
def create_session(): optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0) config = tf.ConfigProto( operation_timeout_in_ms=150000, graph_options=tf.GraphOptions(optimizer_options=optimizer_options)) # config.graph_options.rewrite_options.constant_folding = rewriter_config_pb2.RewriterConfig.OFF config.graph_options.place_pruned_graph = True return tf.Session(config=config)
def create_session(): global sess optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0) config = tf.ConfigProto( operation_timeout_in_ms=150000, graph_options=tf.GraphOptions(optimizer_options=optimizer_options)) sess = tf.Session(config=config) return sess
def session_config(gpu_mem_fraction=0.95): """The TensorFlow Session config to use.""" graph_options = tf.GraphOptions(optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L1, do_function_inlining=False)) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_mem_fraction) config = tf.ConfigProto( allow_soft_placement=True, graph_options=graph_options, gpu_options=gpu_options) return config
def default_config(): optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0) device_filters = ["/job:ps", "/job:worker/task:%d" % (FLAGS.task)] config = tf.ConfigProto( device_filters=device_filters, graph_options=tf.GraphOptions(optimizer_options=optimizer_options)) config.log_device_placement = False config.allow_soft_placement = False return config
def session_config(): optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L1, do_function_inlining=True) graph_options = tf.GraphOptions(optimizer_options=optimizer_options) config = tf.ConfigProto(allow_soft_placement=True, graph_options=graph_options) config.gpu_options.visible_device_list = str(hvd.local_rank()) return config
def session_config(): optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0) graph_options = tf.GraphOptions(optimizer_options=optimizer_options) gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(graph_options=graph_options, gpu_options=gpu_options, intra_op_parallelism_threads=10, inter_op_parallelism_threads=10, log_device_placement=True) return config
def _OptimizerOptions(): for cse in [False, True]: for inline in [False, True]: for cfold in [False, True]: yield tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0, do_common_subexpression_elimination=cse, do_function_inlining=inline, do_constant_folding=cfold)))
def get_session(disable_optimizer): #print('DISABLE OPTIMIZER:', str(disable_optimizer).upper()) if disable_optimizer: optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0) config = tf.ConfigProto(operation_timeout_in_ms=150000, graph_options=tf.GraphOptions(optimizer_options=optimizer_options)) config.graph_options.rewrite_options.constant_folding = rewriter_config_pb2.RewriterConfig.OFF config.graph_options.place_pruned_graph = True return tf.Session(config=config) else: return tf.Session()
def create_session(): """Create session with optimizations disabled.""" from tensorflow.core.protobuf import rewriter_config_pb2 optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0) config = tf.ConfigProto(operation_timeout_in_ms=150000, graph_options=tf.GraphOptions(optimizer_options=optimizer_options)) config.graph_options.rewrite_options.constant_folding=rewriter_config_pb2.RewriterConfig.OFF config.graph_options.place_pruned_graph = True # config.log_device_placement = True return tf.Session(config=config)
def session_config(params): optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L1, do_function_inlining=False) graph_options = tf.GraphOptions(optimizer_options=optimizer_options) config = tf.ConfigProto(allow_soft_placement=True, graph_options=graph_options) if params.device_list: device_str = ",".join([str(i) for i in params.device_list]) config.gpu_options.visible_device_list = device_str return config
def get_session(self): config = tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0))) config.gpu_options.allow_growth = True # config.log_device_placement = True sess = tf.Session(config=config) # self.sess = tf.Session() # data_processor = MrpcProcessor(self.encoder.FLAGS) # path_or_data = data_processor return sess
def session_config(params): optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L1, do_function_inlining=True) graph_options = tf.GraphOptions(optimizer_options=optimizer_options) gpu_options = tf.GPUOptions(allow_growth=True, visible_device_list=str(params.gpu)) config = tf.ConfigProto(allow_soft_placement=True, graph_options=graph_options, gpu_options=gpu_options) return config
def _OptimizerOptions(self): ret = [] for cse in [False, True]: for inline in [False, True]: for cfold in [False, True]: ret.append(tf.ConfigProto(graph_options=tf.GraphOptions( optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0, do_common_subexpression_elimination=cse, do_function_inlining=inline, do_constant_folding=cfold)))) return ret
def create_session(): global sess from tensorflow.core.protobuf import rewriter_config_pb2 optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0) config = tf.ConfigProto(operation_timeout_in_ms=150000, graph_options=tf.GraphOptions(optimizer_options=optimizer_options)) config.graph_options.rewrite_options.constant_folding=rewriter_config_pb2.RewriterConfig.OFF config.graph_options.place_pruned_graph = True sess = tf.InteractiveSession(config=config) # todo: replace with regular sess return sess
def benchmark_matmul(n, iterations, logFLOPs, num_gpu, devlist, precision, logfile): # generate list of devices if devlist is empty if devlist == '': if num_gpu == 0: devlist = ['/cpu:0'] else: devlist = ['/gpu:%d' % i for i in range(num_gpu)] else: devlist = devlist.split(',') ops = n**3 + (n - 1) * n**2 if logFLOPs > 0: iterations = int(np.ceil(10**logFLOPs / ops)) print("Running %d iterations" % iterations) datatype = eval('tf.float%d' % (precision)) for dev in devlist: with tf.device(dev): matA = tf.Variable(tf.ones([n, n], dtype=datatype)) matB = tf.Variable(tf.ones([n, n], dtype=datatype)) prod = tf.matmul(matA, matB) # Creates the session config = tf.ConfigProto( graph_options=tf.GraphOptions(optimizer_options=tf.OptimizerOptions( opt_level=tf.OptimizerOptions.L0)), log_device_placement=False) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) # Warm-up run sess.run(prod.op) # Benchmark run t = time.time() for _ in range(iterations): sess.run(prod.op) timeUsed = (time.time() - t) / iterations if num_gpu >= 1: mem = sess.run(tf.contrib.memory_stats.MaxBytesInUse()) logtext = ('matrix multiplication, %d, %d, %.3f, %.3f\n' % (n, precision, ops * 1e-9 / timeUsed, mem / 1e6)) else: logtext = ('matrix multiplication, %d, %d, %.3f, 0\n' % (n, precision, ops * 1e-9 / timeUsed)) f = open(logfile, 'a+') f.write(logtext) f.close() return timeUsed