def __init__(self, inputx, indices, updates): super(TestScatterUpdateDynamicNet, self).__init__() self.scatter_update = P.ScatterUpdate() self.test_dynamic = inner.GpuConvertToDynamicShape() self.inputx = Parameter(inputx, name="inputx") self.indices = Parameter(indices, name="indices") self.updates = Parameter(updates, name="updates")
def __init__(self, strategy1=None, strategy2=None): super(Net, self).__init__() self.inputs = Parameter(Tensor(np.ones([32, 64, 128]).astype(np.float32)), "input") self.indices = Tensor(np.ones([4, 8]).astype(np.int32)) self.updates = Tensor(np.ones([4, 8, 64, 128]).astype(np.float32)) self.scatter_update = P.ScatterUpdate().shard(strategy1) self.add = P.TensorAdd().shard(strategy2) self.relu = P.ReLU()
def _run_opt_with_sparse(opt, sparse_opt, push, pull, use_locking, use_nesterov, target, beta1_power, beta2_power, beta1, beta2, eps, lr, gradient, params, m, v, ps_parameter, cache_enable): """Apply sparse lazy adam optimizer to the weight parameter when the gradient is sparse.""" success = True indices = gradient.indices values = gradient.values if ps_parameter and not cache_enable: op_shape = P.Shape() shapes = (op_shape(params), op_shape(m), op_shape(v), op_shape(beta1_power), op_shape(beta2_power), op_shape(lr), op_shape(beta1), op_shape(beta2), op_shape(eps), op_shape(values), op_shape(indices)) success = F.depend( success, pull( push((beta1_power, beta2_power, lr, beta1, beta2, eps, values, indices), shapes), params)) return success if not target: success = F.depend( success, sparse_opt(params, m, v, beta1_power, beta2_power, lr, beta1, beta2, eps, values, indices)) else: op_gather = P.Gather() op_sqrt = P.Sqrt() scatter_add = P.ScatterAdd(use_locking) scatter_update = P.ScatterUpdate(use_locking) m_slice = op_gather(m, indices, 0) v_slice = op_gather(v, indices, 0) next_m = m_slice * beta1 + values * (1 - beta1) next_v = v_slice * beta2 + values * values * (1 - beta2) lr_t = lr * op_sqrt(1 - beta2_power) / (1 - beta1_power) if use_nesterov: m_temp = beta1 * next_m + values * (1 - beta1) param_update = m_temp / (op_sqrt(next_v) + eps) else: param_update = next_m / (op_sqrt(next_v) + eps) success = F.depend(success, scatter_add(params, indices, -lr_t * param_update)) success = F.depend(success, scatter_update(m, indices, next_m)) success = F.depend(success, scatter_update(v, indices, next_v)) return success
def __init__(self, inputx, indices, updates): super(TestScatterUpdateNet, self).__init__() self.scatter_update = P.ScatterUpdate() self.inputx = Parameter(inputx, name="inputx") self.indices = Parameter(indices, name="indices") self.updates = Parameter(updates, name="updates")
def __init__(self, input_x): super(ScatterUpdateNet, self).__init__() self.input_x = Parameter(input_x, name="para") self.scatter_update = P.ScatterUpdate()
def __init__(self): super(TestScatterUpdateDynamicNet2, self).__init__() self.scatter_update = P.ScatterUpdate() self.test_dynamic = inner.GpuConvertToDynamicShape()