def backward(self, flag, grad): '''Backward gradients through the RNN. Args: flag, for future use. grad, <dy1, dy2,...dyn, dhy, dcy>, where dyi is the gradient for the i-th output, its shape is (batch_size, hidden_size*bidirection?2:1); dhy is the gradient for the final hidden state, its shape is (num_stacks * bidirection?2:1, batch_size, hidden_size). dcy is the gradient for the final cell state. cx is valid only for lstm. For other RNNs there is no cx. Both dhy and dcy could be dummy tensors without shape and data. Returns: <dx1, dx2, ... dxn, dhx, dcx>, where dxi is the gradient tensor for the i-th input, its shape is (batch_size, input_feature_length). dhx is the gradient for the initial hidden state. dcx is the gradient for the initial cell state, which is valid only for lstm. ''' if type(flag) is bool: if flag: flag = model_pb2.kTrain else: flag = model_pb2.kEval tensors = [] for t in grad: assert isinstance(t, tensor.Tensor), 'grad must be py Tensor' tensors.append(t.singa_tensor) ret = self.layer.BackwardWithMultInputs(flag, tensors) return tensor.from_raw_tensors(ret[0]), tensor.from_raw_tensors(ret[1])
def backward(self, flag, dy): '''Backward propagate gradients through this layer. Args: flag (int): for future use. dy (Tensor or list<Tensor>): the gradient tensor(s) y w.r.t the objective loss Return: <dx, <dp1, dp2..>>, dx is a (set of) tensor(s) for the gradient of x , dpi is the gradient of the i-th parameter ''' if type(flag) is bool: if flag: flag = model_pb2.kTrain else: flag = model_pb2.kEval if type(dy) == list: dys = [t.singa_tensor for t in dy] ret = self.layer.BackwardWithMultInputs(flag, dys) else: assert isinstance(dy, tensor.Tensor), \ 'input of %s (type:%s) must be a Tensor or Tensor list'\ % (self.name, type(dy).__name__) dys = dy.singa_tensor ret = self.layer.Backward(flag, dys) if type(ret[0]) is tuple: dxs = tensor.from_raw_tensors(ret[0]) else: dxs = tensor.from_raw_tensor(ret[0]) return dxs, tensor.from_raw_tensors(ret[1])
def backward(self, flag, dy): '''Backward propagate gradients through this layer. Args: flag (int): for future use. dy (Tensor or list<Tensor>): the gradient tensor(s) y w.r.t the objective loss Return: <dx, <dp1, dp2..>>, dx is a (set of) tensor(s) for the gradient of x , dpi is the gradient of the i-th parameter ''' if type(flag) is bool: if flag: flag = model_pb2.kTrain else: flag = model_pb2.kEval if type(dy) == list: dys = [t.singa_tensor for t in dy] ret = self.layer.BackwardWithMultInputs(flag, dys) else: assert isinstance(dy, tensor.Tensor), \ 'the input must be a Tensor or a set of Tensor' dys = dy.singa_tensor ret = self.layer.Backward(flag, dys) if type(ret[0]) is tuple: dxs = tensor.from_raw_tensors(ret[0]) else: dxs = tensor.from_raw_tensor(ret[0]) return dxs, tensor.from_raw_tensors(ret[1])
def backward(self, flag, dy): '''Backward propagate gradients through this layer. Args: flag (int): for future use. dy (Tensor or list<Tensor>): the gradient tensor(s) y w.r.t the objective loss Return: <dx, <dp1, dp2..>>, dx is a (set of) tensor(s) for the gradient of x , dpi is the gradient of the i-th parameter ''' if type(dy) == list: dys = [] for t in dy: dys.append(t.singa_tensor) else: assert isinstance(dy, tensor.Tensor), \ 'the input must be a Tensor or a set of Tensor' dys = dy.singa_tensor ret = self.layer.Backward(flag, dys) if type(ret[0]) == list: dxs = tensor.from_raw_tensors(ret[0]) else: dxs = tensor.from_raw_tensor(ret[0]) return dxs, tensor.from_raw_tensors(ret[1])
def forward(self, flag, x): '''Forward propagate through this layer. Args: flag: True (kTrain) for training (kEval); False for evaluating; other values for furture use. x (Tensor or list<Tensor>): an input tensor if the layer is connected from a single layer; a list of tensors if the layer is connected from multiple layers. Return: a tensor if the layer is connected to a single layer; a list of tensors if the layer is connected to multiple layers; ''' assert self.has_setup, 'Must call setup() before forward()' if type(flag) is bool: if flag: flag = model_pb2.kTrain else: flag = model_pb2.kEval if type(x) is list: xs = [t.singa_tensor for t in x] y = self.layer.ForwardWithMultInputs(flag, xs) else: assert isinstance(x, tensor.Tensor), \ 'input of %s (type:%s) must be a Tensor or Tensor list'\ % (self.name, type(x).__name__) y = self.layer.Forward(flag, x.singa_tensor) if type(y) is tuple: return tensor.from_raw_tensors(y) else: return tensor.from_raw_tensor(y)
def forward(self, flag, x): '''Forward propagate through this layer. Args: flag: True (kTrain) for training (kEval); False for evaluating; other values for furture use. x (Tensor or list<Tensor>): an input tensor if the layer is connected from a single layer; a list of tensors if the layer is connected from multiple layers. Return: a tensor if the layer is connected to a single layer; a list of tensors if the layer is connected to multiple layers; ''' assert self.has_setup, 'Must call setup() before forward()' if type(flag) is bool: if flag: flag = model_pb2.kTrain else: flag = model_pb2.kEval if type(x) is list: xs = [t.singa_tensor for t in x] y = self.layer.ForwardWithMultInputs(flag, xs) else: assert isinstance(x, tensor.Tensor), \ 'input must be a Tensor or a list of Tensor' y = self.layer.Forward(flag, x.singa_tensor) if type(y) is tuple: return tensor.from_raw_tensors(y) else: return tensor.from_raw_tensor(y)
def forward(self, flag, inputs): '''Forward inputs through the RNN. Args: flag, kTrain or kEval. inputs, <x1, x2,...xn, hx, cx>, where xi is the input tensor for the i-th position, its shape is (batch_size, input_feature_length); the batch_size of xi must >= that of xi+1; hx is the initial hidden state of shape (num_stacks * bidirection?2:1, batch_size, hidden_size). cx is the initial cell state tensor of the same shape as hy. cx is valid for only lstm. For other RNNs there is no cx. Both hx and cx could be dummy tensors without shape and data. Returns: <y1, y2, ... yn, hy, cy>, where yi is the output tensor for the i-th position, its shape is (batch_size, hidden_size * bidirection?2:1). hy is the final hidden state tensor. cx is the final cell state tensor. cx is only used for lstm. ''' assert self.has_setup, 'Must call setup() before forward()' assert len(inputs) > 1, 'The input to RNN must include at '\ 'least one input tensor '\ 'and one hidden state tensor (could be a dummy tensor)' tensors = [] for t in inputs: assert isinstance(t, tensor.Tensor), \ 'input must be py Tensor %s' % (type(t)) tensors.append(t.singa_tensor) y = self.layer.Forward(flag, tensors) return tensor.from_raw_tensors(y)
def forward(self, flag, x): '''Forward propagate through this layer. Args: flag (int): kTrain or kEval x (Tensor or list<Tensor>): an input tensor if the layer is connected from a single layer; a list of tensors if the layer is connected from multiple layers. Return: a tensor if the layer is connected to a single layer; a list of tensors if the layer is connected to multiple layers; ''' assert self.has_setup, 'Must call setup() before forward()' if type(x) == list: xs = [] for t in x: xs.append(t.singa_tensor) else: assert isinstance(x, tensor.Tensor), \ 'input must be a Tensor or a list of Tensor' xs = x.singa_tensor y = self.layer.Forward(flag, xs) if type(y) == list: return tensor.from_raw_tensors(y) else: return tensor.from_raw_tensor(y)
def forward(self, flag, x): '''Forward propagate through this layer. Args: flag (int): kTrain or kEval x (Tensor or list<Tensor>): an input tensor if the layer is connected from a single layer; a list of tensors if the layer is connected from multiple layers. Return: a tensor if the layer is connected to a single layer; a list of tensors if the layer is connected to multiple layers; ''' assert self.has_setup, 'Must call setup() before forward()' if type(x) == list: xs = [] for t in x: x.append(t.singa_tensor) else: assert isinstance(x, tensor.Tensor), \ 'input must be a Tensor or a list of Tensor' xs = x.singa_tensor y = self.layer.Forward(flag, xs) if type(y) == list: return tensor.from_raw_tensors(y) else: return tensor.from_raw_tensor(y)
def param_values(self): '''Return param value tensors. Parameter tensors are not stored as layer members because cpp Tensor could be moved onto diff devices due to the change of layer device, which would result in inconsistency. Returns: a list of tensors, one for each paramter ''' if self.layer is None: return [] else: return tensor.from_raw_tensors(self.layer.param_values())