def __init__(self, inputx, indices, updates): super(TestScatterAddDynamicNet, self).__init__() self.scatter_add = P.ScatterAdd() self.test_dynamic = inner.GpuConvertToDynamicShape() self.inputx = Parameter(inputx, name="inputx") self.indices = Parameter(indices, name="indices") self.updates = Parameter(updates, name="updates")
def _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, param, m, v, ps_parameter, cache_enable): """Apply sparse adam optimizer to the weight parameter when the gradient is sparse.""" success = True indices = gradient.indices values = gradient.values if ps_parameter and not cache_enable: op_shape = P.Shape() shapes = (op_shape(param), op_shape(m), op_shape(v), op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices)) success = F.depend(success, pull(push((beta1_power, beta2_power, lr, beta1, beta2, eps, values, indices), shapes), param)) return success if not target: success = F.depend(success, sparse_opt(param, m, v, beta1_power, beta2_power, lr, beta1, beta2, eps, values, indices)) else: op_mul = P.Mul() op_square = P.Square() op_sqrt = P.Sqrt() scatter_add = P.ScatterAdd(use_locking) success = F.depend(success, F.assign(m, op_mul(beta1, m))) success = F.depend(success, F.assign(v, op_mul(beta2, v))) grad_indices = gradient.indices grad_value = gradient.values next_m = scatter_add(m, grad_indices, op_mul(F.tuple_to_array((1.0,)) - beta1, grad_value)) next_v = scatter_add(v, grad_indices, op_mul(F.tuple_to_array((1.0,)) - beta2, op_square(grad_value))) if use_nesterov: m_temp = next_m * _scaler_ten F.assign(m, op_mul(beta1, next_m)) div_value = scatter_add(m, op_mul(grad_indices, _scaler_one), op_mul(F.tuple_to_array((1.0,)) - beta1, grad_value)) param_update = div_value / (op_sqrt(next_v) + eps) F.assign(m, m_temp / _scaler_ten) else: param_update = next_m / (op_sqrt(next_v) + eps) lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) next_param = param - lr_t * param_update success = F.depend(success, F.assign(param, next_param)) success = F.depend(success, F.assign(m, next_m)) success = F.depend(success, F.assign(v, next_v)) return success
def _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params, m, v, ps_parameter, cache_enable): """Apply sparse lazy adam optimizer to the weight parameter when the gradient is sparse.""" success = True indices = gradient.indices values = gradient.values if ps_parameter and not cache_enable: op_shape = P.Shape() shapes = (op_shape(params), op_shape(m), op_shape(v), op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices)) success = F.depend( success, pull( push((beta1_power, beta2_power, lr, beta1, beta2, eps, values, indices), shapes), params)) return success if not target: success = F.depend( success, sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2, eps, values, indices)) else: op_gather = P.Gather() op_sqrt = P.Sqrt() scatter_add = P.ScatterAdd(use_locking) scatter_update = P.ScatterUpdate(use_locking) m_slice = op_gather(m, indices, 0) v_slice = op_gather(v, indices, 0) next_m = m_slice * beta1 + values * (1 - beta1) next_v = v_slice * beta2 + values * values * (1 - beta2) lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) if use_nesterov: m_temp = beta1 * next_m + values * (1 - beta1) param_update = m_temp / (op_sqrt(next_v) + eps) else: param_update = next_m / (op_sqrt(next_v) + eps) success = F.depend(success, scatter_add(params, indices, -lr_t * param_update)) success = F.depend(success, scatter_update(m, indices, next_m)) success = F.depend(success, scatter_update(v, indices, next_v)) return success
def __init__(self, lock, inputx, indices, updates): super(TestScatterAddNet, self).__init__() self.scatter_add = P.ScatterAdd(use_locking=lock) self.inputx = Parameter(inputx, name="inputx") self.indices = Parameter(indices, name="indices") self.updates = Parameter(updates, name="updates")
def __init__(self): super(TestScatterAddDynamicNet2, self).__init__() self.scatter_add = P.ScatterAdd() self.test_dynamic = inner.GpuConvertToDynamicShape()
def __init__(self, input_x): super(ScatterAddNet, self).__init__() self.input_x = Parameter(input_x, name="para") self.scatter_add = P.ScatterAdd()
def __init__(self, inputx): super(TestScatterAddDynamicNet2, self).__init__() self.scatter_add = P.ScatterAdd() self.test_dynamic = inner.GpuConvertToDynamicShape() self.inputx = Parameter(inputx, name="inputx")