def test_tanh(x, dtype_str, tensor_fn, dev_str, call): # smoke test x = tensor_fn(x, dtype_str, dev_str) ret = ivy.tanh(x) # type test assert ivy.is_array(ret) # cardinality test assert ret.shape == x.shape # value test assert np.allclose(call(ivy.tanh, x), ivy.numpy.tanh(ivy.to_numpy(x))) # compilation test helpers.assert_compilable(ivy.tanh)
def get_start_state(self, _=None, batch_size=None, dtype_str=None, v=None): if v is None: v = self.v else: v = Container(v) read_vector_list = [_expand(ivy.tanh(var), dim=0, N=batch_size) for _, var in v.read_weights.to_iterator()] w_list = [_expand(ivy.softmax(var), dim=0, N=batch_size) for _, var in v.write_weights.to_iterator()] usage_indicator = _expand(self._usage, dim=0, N=batch_size) M = _expand(v.memory, dim=0, N=batch_size) return NTMControllerState( controller_state=self._controller.get_initial_state(batch_shape=(batch_size,)), read_vector_list=read_vector_list, w_list=w_list, usage_indicator=usage_indicator, M=M)
def lstm_update(x, init_h, init_c, kernel, recurrent_kernel, bias=None, recurrent_bias=None): """ Perform long-short term memory update by unrolling time dimension of input array. :param x: input tensor of LSTM layer *[batch_shape, t, in]*. :type x: array :param init_h: initial state tensor for the cell output *[batch_shape, out]*. :type init_h: array :param init_c: initial state tensor for the cell hidden state *[batch_shape, out]*. :type init_c: array :param kernel: weights for cell kernel *[in, 4 x out]*. :type kernel: array :param recurrent_kernel: weights for cell recurrent kernel *[out, 4 x out]*. :type recurrent_kernel: array :param bias: bias for cell kernel *[4 x out]*. :type bias: array :param recurrent_bias: bias for cell recurrent kernel *[4 x out]*. :type recurrent_bias: array :return: hidden state for all timesteps *[batch_shape,t,out]* and cell state for last timestep *[batch_shape,out]* """ # get shapes x_shape = list(x.shape) batch_shape = x_shape[:-2] timesteps = x_shape[-2] input_channels = x_shape[-1] x_flat = ivy.reshape(x, (-1, input_channels)) # input kernel Wi = kernel Wi_x = ivy.reshape(ivy.matmul(x_flat, Wi) + (bias if bias is not None else 0), batch_shape + [timesteps, -1]) Wii_x, Wif_x, Wig_x, Wio_x = ivy.split(Wi_x, 4, -1) # recurrent kernel Wh = recurrent_kernel # lstm states ht = init_h ct = init_c # lstm outputs ot = x hts_list = list() # unrolled time dimension with lstm steps for Wii_xt, Wif_xt, Wig_xt, Wio_xt in zip(ivy.unstack(Wii_x, axis=-2), ivy.unstack(Wif_x, axis=-2), ivy.unstack(Wig_x, axis=-2), ivy.unstack(Wio_x, axis=-2)): htm1 = ht ctm1 = ct Wh_htm1 = ivy.matmul(htm1, Wh) + (recurrent_bias if recurrent_bias is not None else 0) Whi_htm1, Whf_htm1, Whg_htm1, Who_htm1 = ivy.split(Wh_htm1, num_sections=4, axis=-1) it = ivy.sigmoid(Wii_xt + Whi_htm1) ft = ivy.sigmoid(Wif_xt + Whf_htm1) gt = ivy.tanh(Wig_xt + Whg_htm1) ot = ivy.sigmoid(Wio_xt + Who_htm1) ct = ft * ctm1 + it * gt ht = ot * ivy.tanh(ct) hts_list.append(ivy.expand_dims(ht, -2)) return ivy.concatenate(hts_list, -2), ct
def _forward(self, x, prev_state): prev_read_vector_list = prev_state[1] controller_input = ivy.concatenate([x] + prev_read_vector_list, axis=1) controller_output, controller_state = self._controller(ivy.expand_dims(controller_input, -2), initial_state=prev_state[0]) controller_output = controller_output[..., -1, :] parameters = self._controller_proj(controller_output) parameters = ivy.clip(parameters, -self._clip_value, self._clip_value) head_parameter_list = \ ivy.split(parameters[:, :self._num_parameters_per_head * self._num_heads], self._num_heads, axis=1) erase_add_list = ivy.split(parameters[:, self._num_parameters_per_head * self._num_heads:], 2 * self._write_head_num, axis=1) prev_w_list = prev_state[2] prev_M = prev_state[4] w_list = [] for i, head_parameter in enumerate(head_parameter_list): k = ivy.tanh(head_parameter[:, 0:self._memory_vector_dim]) beta = ivy.softplus(head_parameter[:, self._memory_vector_dim]) g = ivy.sigmoid(head_parameter[:, self._memory_vector_dim + 1]) s = ivy.softmax( head_parameter[:, self._memory_vector_dim + 2:self._memory_vector_dim + 2 + (self._shift_range * 2 + 1)]) gamma = ivy.softplus(head_parameter[:, -1]) + 1 w = self._addressing(k, beta, g, s, gamma, prev_M, prev_w_list[i]) w_list.append(w) # Reading (Sec 3.1) read_w_list = w_list[:self._read_head_num] if self._step == 0: usage_indicator = ivy.zeros_like(w_list[0]) else: usage_indicator = prev_state[3] + ivy.reduce_sum(ivy.concatenate(read_w_list, 0)) read_vector_list = [] for i in range(self._read_head_num): read_vector = ivy.reduce_sum(ivy.expand_dims(read_w_list[i], axis=2) * prev_M, axis=1) read_vector_list.append(read_vector) # Writing (Sec 3.2) prev_wrtie_w_list = prev_w_list[self._read_head_num:] w_wr_size = math.ceil(self._memory_size / 2) if self._retroactive_updates else self._memory_size if self._sequential_writing: batch_size = ivy.shape(x)[0] if self._step < w_wr_size: w_wr_list = [ivy.tile(ivy.cast(ivy.one_hot( ivy.array([self._step]), w_wr_size), 'float32'), (batch_size, 1))] * self._write_head_num else: batch_idxs = ivy.expand_dims(ivy.arange(batch_size, 0), -1) mem_idxs = ivy.expand_dims(ivy.argmax(usage_indicator[..., :w_wr_size], -1), -1) total_idxs = ivy.concatenate((batch_idxs, mem_idxs), -1) w_wr_list = [ivy.scatter_nd(total_idxs, ivy.ones((batch_size,)), (batch_size, w_wr_size))] * self._write_head_num else: w_wr_list = w_list[self._read_head_num:] if self._retroactive_updates: w_ret_list = [self._retroactive_discount * prev_wrtie_w[..., w_wr_size:] + (1 - self._retroactive_discount) * prev_wrtie_w[..., :w_wr_size] for prev_wrtie_w in prev_wrtie_w_list] w_wrtie_list = [ivy.concatenate((w_wr, w_ret), -1) for w_wr, w_ret in zip(w_wr_list, w_ret_list)] else: w_wrtie_list = w_wr_list M = prev_M for i in range(self._write_head_num): w = ivy.expand_dims(w_wrtie_list[i], axis=2) if self._with_erase: erase_vector = ivy.expand_dims(ivy.sigmoid(erase_add_list[i * 2]), axis=1) M = M * ivy.ones(ivy.shape(M)) - ivy.matmul(w, erase_vector) add_vector = ivy.expand_dims(ivy.tanh(erase_add_list[i * 2 + 1]), axis=1) M = M + ivy.matmul(w, add_vector) NTM_output = self._output_proj(ivy.concatenate([controller_output] + read_vector_list, axis=1)) NTM_output = ivy.clip(NTM_output, -self._clip_value, self._clip_value) self._step += 1 return NTM_output, NTMControllerState( controller_state=controller_state, read_vector_list=read_vector_list, w_list=w_list, usage_indicator=usage_indicator, M=M)
def _forward(self, x): x = ivy.expand_dims(x, 0) x = ivy.tanh(self._linear0(x, v=self.v.linear0)) x = ivy.tanh(self._linear1(x, v=self.v.linear1)) return ivy.tanh(self._linear2(x, v=self.v.linear2))[0]
def _forward(self, x): x = ivy.expand_dims(x, 0) x = ivy.tanh(self._layers[0](x)) x = ivy.tanh(self._layers[1](x)) return ivy.tanh(self._layers[2](x))[0]