def forward(self, x: tensor, h: tensor=None, c: tensor=None): max_batch_size = x.shape[0] if self.batch_first else x.shape[1] self.y = self.register_output_shape([self.num_layers * self.num_directions, max_batch_size, self.hidden_siz]) if h is None: h = zeros([self.num_layers * self.num_directions, max_batch_size, self.hidden_size]) if c is None and self.mode == 'LSTM': c = zeros([self.num_layers * self.num_directions, max_batch_size, self.hidden_size]) return super(rnnbase, self).forward(x, h, c)
def forward_cpu(self, x: tensor) -> tensor: if self._col == [] or self._vol == []: self._col = [1 for _ in range(self.dims)] self._vol = [1 for _ in range(self.dims)] for i in range(self.dims - 1, -1, -1): self._col[i] = int( (x.shape[i + 2] + 2 * self.padding[i] - self.dilation[i] * (self.kernel_size[i] - 1) - 1) // self.stride[i]) + 1 self._vol[i] = x.shape[i + 2] self.batch_size = x.shape[0] self.kernel = vol2col(self.batch_size, self.in_channels, self._vol, self._col, self.kernel_size, self.stride, self.padding, self.dilation) if self._use_bias and self.bias is not None: self.bias = Parameter(zeros, [self.out_channels, *self._col]) y = zeros([self.batch_size, self.out_channels, *self._col]) self.col = self.kernel.forward_cpu(x) self.weight.param.reshape([self.weight.param.shape[0], -1]) y.host_data = np.matmul(self.weight.param.host_data, self.col.host_data) y.reshape([self.out_channels, self.batch_size, self._col[0], self._col[1], self._col[2]]) y.transpose([1, 0, 2, 3, 4]) if self._use_bias and self.bias is not None: y.host_data += self.bias.param.host_data self.cache = [x, y] return y
def forward_cpu(self, x: tensor) -> tensor: assert len(x.shape) == 2 y = zeros([x.shape[0], self.out_features]) y.host_data = np.matmul(x.host_data, self.weight.param.host_data) if self.bias is not None: y.host_data += self.bias.param.host_data self.cache = [x, y] return y
def backward_cpu(self): x = self.cache[0] tmp = x.gradient.host_data.ravel() col = self.col.gradient.host_data.ravel() _col2vol(tmp, col, self.batch_size, self.in_channels, self.n_output_plane, self.index_length, nbt.List(self._vol), nbt.List(self._col), nbt.List(self.kernel_size), nbt.List(self.stride), nbt.List(self.padding), nbt.List(self.dilation)) x.gradient.host_data = tmp.reshape(x.shape) self.col = zeros([self.n_output_plane, self.output_length]) return x
def forward_cpu(self, x: tensor) -> tensor: assert (len(x.shape) == len(self.axes)) if self.old_shape == [] or self.new_shape == []: self.old_shape = [s for s in x.shape] self.new_shape = [ self.old_shape[self.axes[i]] for i in range(len(self.axes)) ] self.prepare_stride(self.old_shape, self.new_shape) y = zeros(self.new_shape) for i in range(x.size): old_pos = 0 new_pos = i for j in range(len(x.shape)): order = self.stride[j] old_pos += (new_pos / self.stride[len(x.shape) + j] * self.stride[len(x.shape) * 2 + order]) new_pos %= self.stride[len(x.shape) + j] y.host_data[i] = x.host_data[old_pos] self.cache.append(x) self.cache.append(y) return y
def __init__(self, batch_size: int, in_channels: int, _vol: List, _col: List, kernel_size: List, stride: List, padding: List, dilation: List): super(vol2col, self).__init__() self.batch_size = batch_size self.in_channels = in_channels self._vol = _vol self._col = _col self.kernel_size = kernel_size self.stride = stride self.padding = padding self.dilation = dilation self.n_output_plane = self.in_channels self.output_length = self.batch_size self.index_length = self.in_channels self._c = 1 for k in self.kernel_size: self.n_output_plane *= k for c in self._col: self.output_length *= c self.index_length *= c self._c *= c self.col = zeros([self.n_output_plane, self.output_length])
def forward_cpu(self, x: tensor) -> tensor: if self._col == [] or self._vol == []: self._col = [1 for _ in range(self.dims)] self._vol = [1 for _ in range(self.dims)] for i in range(self.dims - 1, 0, -1): self._col[i] = int( (x.shape[i + 2] + 2 * self.padding[i] - self.dilation[i] * (self.kernel_size[i] - 1) - 1) // self.stride[i]) + 1 self._vol[i] = x.shape[i + 2] self.channel_offset *= self.kernel_size[i] self.batch_size = x.shape[0] self.in_channels = x.shape[1] self.kernel = vol2col(self.batch_size, self.in_channels, self._vol, self._col, self.kernel_size, self.stride, self.padding, self.dilation) y = zeros([x.shape[0], x.shape[1], *self._col]) y.reshape([self.in_channels * self.batch_size, -1]) self.col = self.kernel.forward_cpu(x) self.col.reshape( [self.in_channels * self.batch_size, self.channel_offset, -1]) max_idx = [] for i in range(self.in_channels * self.batch_size): tmp = self.col.host_data[i] m_idx = np.argmax(tmp, axis=0) max_idx.append(m_idx) y.host_data[i] = self.col.host_data[i][m_idx, range(m_idx.size)] y.reshape([ self.batch_size, self.in_channels, self._col[0], self._col[1], self._col[2] ]) x.reset_shape() self.cache = [x, y, max_idx] return y
def register_output_shape(self, shape: list, name: str='y') -> tensor: self.outputs[name] = zeros(shape) return self.outputs[name]