def testScatterAdd(self): with self.test_session(): handle = resource_variable_ops.var_handle_op(dtype=dtypes.int32, shape=[1, 1]) resource_variable_ops.assign_variable_op(handle, constant_op.constant([[1]], dtype=dtypes.int32)).run() resource_variable_ops.resource_scatter_add( handle, [0], constant_op.constant([[2]], dtype=dtypes.int32) ).run() read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) self.assertEqual(read.eval(), [[3]])
def testScatterAdd(self): with self.test_session(): handle = resource_variable_ops.var_handle_op( dtype=dtypes.int32, shape=[1, 1]) resource_variable_ops.assign_variable_op( handle, constant_op.constant([[1]], dtype=dtypes.int32)).run() resource_variable_ops.resource_scatter_add( handle, [0], constant_op.constant([[2]], dtype=dtypes.int32)).run() read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) self.assertEqual(read.eval(), [[3]])
def _resource_apply_sparse(self, grad, var, indices, apply_state=None): var_device, var_dtype = var.device, var.dtype.base_dtype coefficients = ((apply_state or {}).get((var_device, var_dtype)) or self._fallback_apply_state(var_device, var_dtype)) acc = self.get_slot(var, 'accumulator') with ops.control_dependencies([ resource_variable_ops.resource_scatter_add( acc.handle, indices, math_ops.square(grad)) ]): acc_t_slice = acc.sparse_read(indices) var_update = resource_variable_ops.resource_scatter_add( var.handle, indices, coefficients['neg_lr_t'] * grad / (math_ops.sqrt(acc_t_slice) + coefficients['epsilon'])) return var_update
def _resource_scatter_add(self, x, i, v): # # Daqi - handles incompatibility between the old Variable and new ResourceVariable. For now they refer to different C++ implementations. # Future releases should see a merge of scatter_add and resource_scatter_add. # with tf.control_dependencies( [resource_variable_ops.resource_scatter_add(x.handle, i, v)]): return x.value()
def _resource_apply_sparse_duplicate_indices(self, grad, var, indices): if self._momentum: return super(SGD, self)._resource_apply_sparse_duplicate_indices( grad, var, indices) else: return resource_variable_ops.resource_scatter_add( var.handle, indices, -grad * math_ops.cast( self._get_hyper("learning_rate"), grad.dtype.base_dtype))
def _resource_scatter_add(self, x, i, v): # # We use x.handle for ResourceVariables. # resource_scatter_add and scatter_add refer to different ops in C++. # with tf.control_dependencies( [resource_variable_ops.resource_scatter_add(x.handle, i, v)]): return x.value()
def _resource_apply_sparse_duplicate_indices(self, grad, var, indices, state): if self._use_momentum: return super(SGD, self)._resource_apply_sparse_duplicate_indices( grad, var, indices, state) else: lr = state.get_hyper("learning_rate", grad.dtype.base_dtype) return resource_variable_ops.resource_scatter_add(var.handle, indices, -grad * lr)
def _resource_apply_sparse_duplicate_indices(self, grad, var, indices, state): if self._use_momentum: return super(SGD, self)._resource_apply_sparse_duplicate_indices( grad, var, indices, state) else: lr = state.get_hyper("learning_rate", grad.dtype.base_dtype) return resource_variable_ops.resource_scatter_add( var.handle, indices, -grad * lr)
def _resource_apply_sparse_duplicate_indices(self, grad, var, indices): if self._momentum: return super(SGD, self)._resource_apply_sparse_duplicate_indices( grad, var, indices) else: var_dtype = var.dtype.base_dtype lr_t = self._decayed_lr(var_dtype) return resource_variable_ops.resource_scatter_add(var.handle, indices, -grad * lr_t)
def testScatterAdd(self): handle = resource_variable_ops.var_handle_op( dtype=dtypes.int32, shape=[1, 1]) self.evaluate(resource_variable_ops.assign_variable_op( handle, constant_op.constant([[1]], dtype=dtypes.int32))) self.evaluate(resource_variable_ops.resource_scatter_add( handle, [0], constant_op.constant([[2]], dtype=dtypes.int32))) read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) self.assertEqual(self.evaluate(read), [[3]])
def _resource_apply_sparse_duplicate_indices(self, grad, var, indices, **kwargs): if self._momentum: return super(CustomSGD, self)._resource_apply_sparse_duplicate_indices(grad, var, indices, **kwargs) else: var_device, var_dtype = var.device, var.dtype.base_dtype coefficients = kwargs.get("apply_state", {}).get((var_device, var_dtype)) or self._fallback_apply_state( var_device, var_dtype ) return resource_variable_ops.resource_scatter_add(var.handle, indices, -grad * coefficients["lr_t"])
def _assign_sub(self, ref, updates, indices=None): if indices is not None: if isinstance(ref, tf.Variable): return tf.scatter_sub(ref, indices, updates, use_locking=self._use_locking) elif isinstance(ref, resource_variable_ops.ResourceVariable): with tf.control_dependencies([resource_variable_ops.resource_scatter_add(ref.handle, indices, -updates)]): return ref.value() else: raise TypeError("did not expect type %r" % type(ref)) else: return tf.assign_sub(ref, updates, use_locking=self._use_locking)
def testScatterAddScalar(self): with self.test_session() as sess, self.test_scope(): handle = resource_variable_ops.var_handle_op( dtype=dtypes.int32, shape=[1, 1]) sess.run( resource_variable_ops.assign_variable_op( handle, constant_op.constant([[1]], dtype=dtypes.int32))) sess.run( resource_variable_ops.resource_scatter_add( handle, [0], constant_op.constant(2, dtype=dtypes.int32))) read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) self.assertEqual(self.evaluate(read), [[3]])
def testScatterAddScalar(self): with self.session() as sess, self.test_scope(): handle = resource_variable_ops.var_handle_op( dtype=dtypes.int32, shape=[1, 1]) sess.run( resource_variable_ops.assign_variable_op( handle, constant_op.constant([[1]], dtype=dtypes.int32))) sess.run( resource_variable_ops.resource_scatter_add( handle, [0], constant_op.constant(2, dtype=dtypes.int32))) read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) self.assertEqual(self.evaluate(read), [[3]])
def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices): if isinstance(handle, embedding_variable_ops.EmbeddingVariable): global_step = training_util.get_or_create_global_step() return gen_ev_ops.ev_sparse_apply_gradient_descent( handle.handle, math_ops.cast(self._learning_rate_tensor, grad.dtype.base_dtype), grad, indices, global_step, use_locking=self._use_locking) else: return resource_variable_ops.resource_scatter_add( handle.handle, indices, -grad * self._learning_rate)
def _resource_apply_sparse(self, grad, var, indices): momentum_buffer = self.get_slot(var, "momentum") learning_rate = math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype) momentum = math_ops.cast(self._momentum_tensor, var.dtype.base_dtype) nu = math_ops.cast(self._nu_tensor, var.dtype.base_dtype) momentum_op = training_ops.resource_sparse_apply_momentum( var.handle, momentum_buffer.handle, nu * (1.0 - momentum) * learning_rate, grad, indices, momentum, use_locking=self._use_locking, use_nesterov=False, ) with ops.control_dependencies([momentum_op]): delta = (nu - 1.0) * learning_rate * grad gd_op = resource_variable_ops.resource_scatter_add(var.handle, indices, delta) return control_flow_ops.group(momentum_op, gd_op)
def _resource_scatter_add(self, x, i, v): with ops.control_dependencies( [resource_variable_ops.resource_scatter_add(x.handle, i, v)]): return x.value()
def _resource_scatter_add(self, x, i, v, _=None): # last argument allows for one overflow argument, to have the same function # signature as state_ops.scatter_add with ops.control_dependencies( [resource_variable_ops.resource_scatter_add(x.handle, i, v)]): return x.value()
def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices, state): lr = state.get_hyper("learning_rate", grad.dtype.base_dtype) return resource_variable_ops.resource_scatter_add( handle.handle, indices, -grad * lr)
def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices): return resource_variable_ops.resource_scatter_add( handle.handle, indices, -grad * self._get_hyper("learning_rate"))
def _wrapWOAccu(accuGrads, grad, var, indices, apply_state): return resource_variable_ops.resource_scatter_add( var.handle, indices, grad * 0.0)
def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices): return resource_variable_ops.resource_scatter_add( handle.handle, indices, -grad * math_ops.cast(self._learning_rate_tensor, grad.dtype.base_dtype))
def _resource_scatter_add(x, i, v): dependencies = [ resource_variable_ops.resource_scatter_add(x.handle, i, v) ] with ops.control_dependencies(dependencies): return x.value()
def _resource_apply_sparse_duplicate_indices(self, grad, handle, indices): return resource_variable_ops.resource_scatter_add( handle.handle, indices, -grad * self._learning_rate)
def _resource_scatter_add(self, x, i, v, _=None): with ops.control_dependencies( [resource_variable_ops.resource_scatter_add(x.handle, i, v)]): return tf.convert_to_tensor(x)
def _resource_apply_sparse_duplicate_indices(self, grad, var, indices): return resource_variable_ops.resource_scatter_add( var.handle, indices, -grad * math_ops.cast( self._get_hyper("learning_rate"), var.dtype.base_dtype))
def _resource_apply_sparse_duplicate_indices( self, grad, handle, indices, state): lr = state.get_hyper("learning_rate", grad.dtype.base_dtype) return resource_variable_ops.resource_scatter_add( handle.handle, indices, -grad * lr)
def _resource_apply_sparse(self, grad, handle, indices): return resource_variable_ops.resource_scatter_add( handle, indices, -grad * self._learning_rate)