def testMultiDevices(self): with self.cached_session() as sess: with ops.device(test.gpu_device_name()): a = constant_op.constant(1.0) a_handle = self.evaluate(session_ops.get_session_handle(a)) with ops.device("/cpu:0"): b = constant_op.constant(2.0) b_handle = self.evaluate(session_ops.get_session_handle(b)) a_p, a_t = session_ops.get_session_tensor(a_handle.handle, dtypes.float32) b_p, b_t = session_ops.get_session_tensor(b_handle.handle, dtypes.float32) c = math_ops.add(a_t, b_t) c_handle = sess.run( session_ops.get_session_handle(c), feed_dict={a_p: a_handle.handle, b_p: b_handle.handle}) self.assertEqual(3.0, c_handle.eval())
def testMultiDevices(self): with self.test_session() as sess: with ops.device(test.gpu_device_name()): a = constant_op.constant(1.0) a_handle = sess.run(session_ops.get_session_handle(a)) with ops.device("/cpu:0"): b = constant_op.constant(2.0) b_handle = sess.run(session_ops.get_session_handle(b)) a_p, a_t = session_ops.get_session_tensor(a_handle.handle, dtypes.float32) b_p, b_t = session_ops.get_session_tensor(b_handle.handle, dtypes.float32) c = math_ops.add(a_t, b_t) c_handle = sess.run( session_ops.get_session_handle(c), feed_dict={a_p: a_handle.handle, b_p: b_handle.handle}) self.assertEqual(3.0, c_handle.eval())
def testHandlePlacement(self): with self.test_session() as sess: a = constant_op.constant(1.0) a_handle_op = session_ops.get_session_handle(a) b = constant_op.constant(2.0) b_handle_op = session_ops.get_session_handle(b) a_handle = sess.run(a_handle_op) b_handle = sess.run(b_handle_op) a_p, a_t = session_ops.get_session_tensor(a_handle.handle, dtypes.float32) b_p, b_t = session_ops.get_session_tensor(b_handle.handle, dtypes.float32) c = math_ops.add(a_t, b_t) c_handle = sess.run( session_ops.get_session_handle(c), feed_dict={a_p: a_handle.handle, b_p: b_handle.handle}) self.assertEqual(3.0, c_handle.eval())
def testHandlePlacement(self): with self.cached_session() as sess: a = constant_op.constant(1.0) a_handle_op = session_ops.get_session_handle(a) b = constant_op.constant(2.0) b_handle_op = session_ops.get_session_handle(b) a_handle = self.evaluate(a_handle_op) b_handle = self.evaluate(b_handle_op) a_p, a_t = session_ops.get_session_tensor(a_handle.handle, dtypes.float32) b_p, b_t = session_ops.get_session_tensor(b_handle.handle, dtypes.float32) c = math_ops.add(a_t, b_t) c_handle = sess.run( session_ops.get_session_handle(c), feed_dict={a_p: a_handle.handle, b_p: b_handle.handle}) self.assertEqual(3.0, c_handle.eval())
def testHandleBasic(self): with self.cached_session() as sess: # Return a handle. a = constant_op.constant(10) b = constant_op.constant(5) c = math_ops.multiply(a, b) h = session_ops.get_session_handle(c) h = self.evaluate(h) # Feed a tensor handle. f, x = session_ops.get_session_tensor(h.handle, dtypes.int32) y = math_ops.multiply(x, 10) self.assertEqual(500, sess.run(y, feed_dict={f: h.handle}))
def testHandleBasic(self): with self.test_session() as sess: # Return a handle. a = constant_op.constant(10) b = constant_op.constant(5) c = math_ops.multiply(a, b) h = session_ops.get_session_handle(c) h = sess.run(h) # Feed a tensor handle. f, x = session_ops.get_session_tensor(h.handle, dtypes.int32) y = math_ops.multiply(x, 10) self.assertEqual(500, sess.run(y, feed_dict={f: h.handle}))
def testHandleGC(self): with self.test_session() as sess: # initial values live on CPU with ops.device("/cpu:0"): one = constant_op.constant(1, dtype=dtypes.float32) one_handle = sess.run(session_ops.get_session_handle(one)) x_handle = sess.run(session_ops.get_session_handle(one)) # addition lives on GPU with ops.device(test.gpu_device_name()): add_h1, add_t1 = session_ops.get_session_tensor(one_handle.handle, dtypes.float32) add_h2, add_t2 = session_ops.get_session_tensor(x_handle.handle, dtypes.float32) add_op = math_ops.add(add_t1, add_t2) add_output = session_ops.get_session_handle(add_op) # add 1 to tensor 20 times for _ in range(20): x_handle = sess.run( add_output, feed_dict={add_h1: one_handle.handle, add_h2: x_handle.handle})
def testHandleGC(self): with self.cached_session() as sess: # initial values live on CPU with ops.device("/cpu:0"): one = constant_op.constant(1, dtype=dtypes.float32) one_handle = self.evaluate(session_ops.get_session_handle(one)) x_handle = self.evaluate(session_ops.get_session_handle(one)) # addition lives on GPU with ops.device(test.gpu_device_name()): add_h1, add_t1 = session_ops.get_session_tensor(one_handle.handle, dtypes.float32) add_h2, add_t2 = session_ops.get_session_tensor(x_handle.handle, dtypes.float32) add_op = math_ops.add(add_t1, add_t2) add_output = session_ops.get_session_handle(add_op) # add 1 to tensor 20 times for _ in range(20): x_handle = sess.run( add_output, feed_dict={add_h1: one_handle.handle, add_h2: x_handle.handle})
def testHandleForLoop(self): with self.cached_session() as sess: # Initialize a handle. a = constant_op.constant(0) h = session_ops.get_session_handle(a) h = self.evaluate(h) # Do some computation. f, x = session_ops.get_session_tensor(h.handle, dtypes.int32) # Must define the loop body outside the loop. h_x = session_ops.get_session_handle(math_ops.add(x, 1)) for _ in range(100): # This exercises garbage collection. h = sess.run(h_x, feed_dict={f: h.handle}) self.assertEqual(100, h.eval())
def testHandleForLoop(self): with self.test_session() as sess: # Initialize a handle. a = constant_op.constant(0) h = session_ops.get_session_handle(a) h = sess.run(h) # Do some computation. f, x = session_ops.get_session_tensor(h.handle, dtypes.int32) # Must define the loop body outside the loop. h_x = session_ops.get_session_handle(math_ops.add(x, 1)) for _ in range(100): # This exercises garbage collection. h = sess.run(h_x, feed_dict={f: h.handle}) self.assertEqual(100, h.eval())
def testHandleWhileLoop(self): with self.cached_session() as sess: # Initialize a handle. a = constant_op.constant(0) h = session_ops.get_session_handle(a) h = self.evaluate(h) # Do some computation. f, x = session_ops.get_session_tensor(h.handle, dtypes.int32) b = constant_op.constant(100) p = math_ops.less(x, b) # Must define the loop body outside the loop. h_x = session_ops.get_session_handle(math_ops.add(x, 1)) while True: rp, h = sess.run([p, h_x], feed_dict={f: h.handle}) if not rp: break self.assertEqual(101, h.eval())
def testHandleCond(self): with self.test_session() as sess: # Return a handle and a value a = constant_op.constant(10) b = constant_op.constant(5) p = math_ops.less(a, b) c = math_ops.multiply(a, b) h = session_ops.get_session_handle(c) p, h = sess.run([p, h]) # Run by feeding a tensor handle. f, x = session_ops.get_session_tensor(h.handle, dtypes.int32) if p: y = math_ops.multiply(x, 10) else: y = math_ops.multiply(x, 100) result = sess.run(y, feed_dict={f: h.handle}) self.assertEqual(5000, result)
def testHandleCond(self): with self.cached_session() as sess: # Return a handle and a value a = constant_op.constant(10) b = constant_op.constant(5) p = math_ops.less(a, b) c = math_ops.multiply(a, b) h = session_ops.get_session_handle(c) p, h = sess.run([p, h]) # Run by feeding a tensor handle. f, x = session_ops.get_session_tensor(h.handle, dtypes.int32) if p: y = math_ops.multiply(x, 10) else: y = math_ops.multiply(x, 100) result = sess.run(y, feed_dict={f: h.handle}) self.assertEqual(5000, result)
def testHandleWhileLoop(self): with self.test_session() as sess: # Initialize a handle. a = constant_op.constant(0) h = session_ops.get_session_handle(a) h = sess.run(h) # Do some computation. f, x = session_ops.get_session_tensor(h.handle, dtypes.int32) b = constant_op.constant(100) p = math_ops.less(x, b) # Must define the loop body outside the loop. h_x = session_ops.get_session_handle(math_ops.add(x, 1)) while True: rp, h = sess.run([p, h_x], feed_dict={f: h.handle}) if not rp: break self.assertEqual(101, h.eval())
def testHandleMover(self): with self.cached_session() as sess: # Return a handle. a = constant_op.constant(10) b = constant_op.constant(5) c = math_ops.multiply(a, b) h = session_ops.get_session_handle(c) h = self.evaluate(h) # Feed a tensor handle. f, x = session_ops.get_session_tensor(h.handle, dtypes.int32) y = math_ops.multiply(x, 10) self.assertEqual(500, sess.run(y, feed_dict={f: h.handle})) # Feed another tensor handle. with ops.device(test.gpu_device_name()): a = constant_op.constant(10) h = session_ops.get_session_handle(a) h = self.evaluate(h) self.assertEqual(100, sess.run(y, feed_dict={f: h.handle}))
def testHandleMover(self): with self.test_session() as sess: # Return a handle. a = constant_op.constant(10) b = constant_op.constant(5) c = math_ops.multiply(a, b) h = session_ops.get_session_handle(c) h = sess.run(h) # Feed a tensor handle. f, x = session_ops.get_session_tensor(h.handle, dtypes.int32) y = math_ops.multiply(x, 10) self.assertEqual(500, sess.run(y, feed_dict={f: h.handle})) # Feed another tensor handle. with ops.device(test.gpu_device_name()): a = constant_op.constant(10) h = session_ops.get_session_handle(a) h = sess.run(h) self.assertEqual(100, sess.run(y, feed_dict={f: h.handle}))
def make_input(self, x, name=""): """Returns Tensor of the same type/device as x which can be used as input to native TensorFlow ops, and substituted later with an ITensor, using callable created with env.make_function(). The user must ensure that future ITensor is on the same device as x, otherwise you will see memcpy/CUDA sync errors. Args: x: ITensor used to initalize input tensor. It used only to determine dtype and device placement. Returns: A Tensor that can be used in TensorFlow ops. """ op_name = "custom_input_%s"%(name) input_holder, input_ = session_ops.get_session_tensor(x.tf_handle, x.dtype, name=op_name) self.input_dict[input_] = input_holder return input_
def handle_to_numpy(self, tensor_handle): """Download contents of TensorHandle and return corresponding numpy array. Args: tensor_handle: session_ops.TensorHandle object Returns: numpy array with a copy of data from tensor_handle """ tf_dtype = tensor_handle._dtype current_device = get_current_device_string(self.g) current_device_sanitized = current_device.replace(":", "") device_func = session_ops.TensorHandle._get_device_name handle_device = device_func(tensor_handle.handle) handle_device = shorten_device_string(handle_device) handle_device_sanitized = handle_device.replace(":", "") key = ("handle_to_numpy", tf_dtype.name, handle_device, current_device) if key in self.op_cache: holder, tensor = self.op_cache[key] else: if self.PRINT_CACHE_MISSES: print("Imperative cache miss for %s"%(str(key))) op_prefix = "handle_to_numpy.%s.%s.%s" % (tf_dtype.name, handle_device_sanitized, current_device_sanitized) with self.g.as_default(): holder, tensor = session_ops.get_session_tensor(tensor_handle.handle, tensor_handle._dtype, name=op_prefix) self.op_cache[key] = (holder, tensor) return self.run(tensor, feed_dict={holder: tensor_handle.handle})
def sum1(self, input_itensor): """Create a specialized op that sums over 1 dimensional vector. This avoids having to create Rank/Range ops that initialize indices in the default tf.reduce_sum.""" op_type_name = "sum1" tf_dtype = input_itensor.dtype current_device = get_current_device_string(self.g) current_device_sanitized = current_device.replace(":", "") key = (op_type_name, tf_dtype.name, current_device_sanitized) if key in self.op_cache: if self.PRINT_CACHE_HITS: print("Imperative cache hit for %s"%(str(key))) op = self.op_cache[key] else: if self.PRINT_CACHE_MISSES: print("Imperative cache miss for %s"%(str(key))) with self.g.as_default(): op_prefix = op_type_name + "." + tf_dtype.name holder, tensor = session_ops.get_session_tensor( input_itensor.tf_handle, input_itensor.dtype, name=op_prefix+".0") input_holders = {"input": holder} reduction_indices = constant_op.constant([0], dtype=dtypes.int32, name=op_prefix+".1") output = gen_math_ops._sum(input=tensor, reduction_indices=reduction_indices, keep_dims=False, name=op_prefix+".op") op_prefix = op_prefix+".out" output_handle = session_ops.get_session_handle(output, op_prefix+".handle") op = Op(self, input_holders, output_handle) self.cache_add(key, op) return op(input=input_itensor)
def apply_op(self, op_type_name, name=None, **keywords): """Wrapper for op_def_library apply_op with caching. This method aims to be semantically identical to "apply_op" of OpDefLibrary but work with ITensor instead of Tensor objects. Brief overview 1. Extract input arguments from keywords and convert Python types into corresponding itensors using type constraints of the corresponding OpDef 2. Figure out OpDef that would've been constructed for this op if original op_def_library were called by looking at inferred/explicit attributes, argument device locations, and current device constext 3. Fetch corresponding OpDef from cache if such OpDef was already constructed 4. Otherwise construct OpDef and wrap it in Op object 5. Save Op object in cache, and run it to produce itensor result """ op_def = self._lookup_opdef_for_type(op_type_name) # names of input arguments, ie "x", "y" for Add op input_names = [arg.name for arg in op_def.input_arg] # convert any python inputs in keywords into ITensors convert_to_itensors_with_type_inference(op_def, keywords, self.env.numpy_to_itensor) current_device = get_current_device_string(self.env.g) key = create_opdef_key(op_def, keywords, current_device) op = self.env.cache_lookup(key) # Found op in cache, run it in return the results if op: return op(**keywords) # Couldn't find op in graph cache, create it and add to cache if self.env.PRINT_CACHE_MISSES: print("Imperative cache miss for %s" %(str(key))) # Graph construction overview: # The new operation must reproduce old operation, except that inputs # and outputs must be string tensor handles instead of Tensors # 1. Convert input string tensor handles into Tensors # 2. Run the op # 3. Convert output tensors into string tensor handles # prefix to use for node names in graph, like "Add.float32" if len(input_names) > 0 and isinstance(keywords[input_names[0]], ITensor): op_prefix = op_type_name + "."+keywords[input_names[0]].dtype.name else: op_prefix = op_type_name + ".no_dtype" # keywords for original apply_op, ITensor entries will be replaced with # Tensors opdeflib_keywords = dict(keywords) # Graph construction 1/3: inputs # replace ITensor inputs with tensorhandle->tensor converters with self.env.g.as_default(): input_holders = {} # placeholders for string tensor handle feeding for input_num, input_name in enumerate(sorted(input_names)): op_name = op_prefix + "." + str(input_num) itensor_input = keywords[input_name] # single tensor input if isinstance(itensor_input, ITensor): holder, tensor = session_ops.get_session_tensor( itensor_input.tf_handle, itensor_input.dtype, name=op_name) input_holders[input_name] = holder opdeflib_keywords[input_name] = tensor # list input, such as for tf.concat, add converter for each element else: assert is_list_or_tuple(itensor_input) holder_list = [] tensor_list = [] for subinput_num, subinput in enumerate(itensor_input): op_name = op_name + "_" + str(subinput_num) holder, tensor = session_ops.get_session_tensor(subinput.tf_handle, subinput.dtype, name=op_name) holder_list.append(holder) tensor_list.append(tensor) opdeflib_keywords[input_name] = tensor_list input_holders[input_name] = holder_list # Graph construction 2/3: op # call original apply_op to create the op output = self.original_op_def_library.apply_op(op_type_name, name=op_prefix+".op", **opdeflib_keywords) # Graph construction 3: outputs # attach tensor->tensorhandle conversion to outputs op_name = op_prefix+".out" # single Tensor output if isinstance(output, ops_lib.Tensor): output_handle = session_ops.get_session_handle(output, op_name+".handle") # operation output like with.control_dependencies elif isinstance(output, ops_lib.Operation): assert False, "Imperative mode only supports ops that produce tensors" else: # list of Tensors, such as for tf.split assert is_list_or_tuple(output) output_handle = [] for output_num, output_tensor in enumerate(output): op_name = op_name + "_" + str(output_num) output_single_handle = session_ops.get_session_handle(output_tensor, (op_name+ ".handle")) output_handle.append(output_single_handle) # save our newly created op in cache op = Op(self.env, input_holders, output_handle) self.env.cache_add(key, op) # execute the op return op(**keywords)