Beispiel #1
0
def test_tanh(x, dtype_str, tensor_fn, dev_str, call):
    # smoke test
    x = tensor_fn(x, dtype_str, dev_str)
    ret = ivy.tanh(x)
    # type test
    assert ivy.is_array(ret)
    # cardinality test
    assert ret.shape == x.shape
    # value test
    assert np.allclose(call(ivy.tanh, x), ivy.numpy.tanh(ivy.to_numpy(x)))
    # compilation test
    helpers.assert_compilable(ivy.tanh)
Beispiel #2
0
 def get_start_state(self, _=None, batch_size=None, dtype_str=None, v=None):
     if v is None:
         v = self.v
     else:
         v = Container(v)
     read_vector_list = [_expand(ivy.tanh(var), dim=0, N=batch_size)
                         for _, var in v.read_weights.to_iterator()]
     w_list = [_expand(ivy.softmax(var), dim=0, N=batch_size)
               for _, var in v.write_weights.to_iterator()]
     usage_indicator = _expand(self._usage, dim=0, N=batch_size)
     M = _expand(v.memory, dim=0, N=batch_size)
     return NTMControllerState(
         controller_state=self._controller.get_initial_state(batch_shape=(batch_size,)),
         read_vector_list=read_vector_list,
         w_list=w_list,
         usage_indicator=usage_indicator,
         M=M)
Beispiel #3
0
def lstm_update(x, init_h, init_c, kernel, recurrent_kernel, bias=None, recurrent_bias=None):
    """
    Perform long-short term memory update by unrolling time dimension of input array.

    :param x: input tensor of LSTM layer *[batch_shape, t, in]*.
    :type x: array
    :param init_h: initial state tensor for the cell output *[batch_shape, out]*.
    :type init_h: array
    :param init_c: initial state tensor for the cell hidden state *[batch_shape, out]*.
    :type init_c: array
    :param kernel: weights for cell kernel *[in, 4 x out]*.
    :type kernel: array
    :param recurrent_kernel: weights for cell recurrent kernel *[out, 4 x out]*.
    :type recurrent_kernel: array
    :param bias: bias for cell kernel *[4 x out]*.
    :type bias: array
    :param recurrent_bias: bias for cell recurrent kernel *[4 x out]*.
    :type recurrent_bias: array
    :return: hidden state for all timesteps *[batch_shape,t,out]* and cell state for last timestep *[batch_shape,out]*
    """

    # get shapes
    x_shape = list(x.shape)
    batch_shape = x_shape[:-2]
    timesteps = x_shape[-2]
    input_channels = x_shape[-1]
    x_flat = ivy.reshape(x, (-1, input_channels))

    # input kernel
    Wi = kernel
    Wi_x = ivy.reshape(ivy.matmul(x_flat, Wi) + (bias if bias is not None else 0),
                        batch_shape + [timesteps, -1])
    Wii_x, Wif_x, Wig_x, Wio_x = ivy.split(Wi_x, 4, -1)

    # recurrent kernel
    Wh = recurrent_kernel

    # lstm states
    ht = init_h
    ct = init_c

    # lstm outputs
    ot = x
    hts_list = list()

    # unrolled time dimension with lstm steps
    for Wii_xt, Wif_xt, Wig_xt, Wio_xt in zip(ivy.unstack(Wii_x, axis=-2), ivy.unstack(Wif_x, axis=-2),
                                              ivy.unstack(Wig_x, axis=-2), ivy.unstack(Wio_x, axis=-2)):
        htm1 = ht
        ctm1 = ct

        Wh_htm1 = ivy.matmul(htm1, Wh) + (recurrent_bias if recurrent_bias is not None else 0)
        Whi_htm1, Whf_htm1, Whg_htm1, Who_htm1 = ivy.split(Wh_htm1, num_sections=4, axis=-1)

        it = ivy.sigmoid(Wii_xt + Whi_htm1)
        ft = ivy.sigmoid(Wif_xt + Whf_htm1)
        gt = ivy.tanh(Wig_xt + Whg_htm1)
        ot = ivy.sigmoid(Wio_xt + Who_htm1)
        ct = ft * ctm1 + it * gt
        ht = ot * ivy.tanh(ct)

        hts_list.append(ivy.expand_dims(ht, -2))

    return ivy.concatenate(hts_list, -2), ct
Beispiel #4
0
    def _forward(self, x, prev_state):
        prev_read_vector_list = prev_state[1]

        controller_input = ivy.concatenate([x] + prev_read_vector_list, axis=1)
        controller_output, controller_state = self._controller(ivy.expand_dims(controller_input, -2),
                                                               initial_state=prev_state[0])
        controller_output = controller_output[..., -1, :]

        parameters = self._controller_proj(controller_output)
        parameters = ivy.clip(parameters, -self._clip_value, self._clip_value)
        head_parameter_list = \
            ivy.split(parameters[:, :self._num_parameters_per_head * self._num_heads], self._num_heads,
                          axis=1)
        erase_add_list = ivy.split(parameters[:, self._num_parameters_per_head * self._num_heads:],
                                       2 * self._write_head_num, axis=1)

        prev_w_list = prev_state[2]
        prev_M = prev_state[4]
        w_list = []
        for i, head_parameter in enumerate(head_parameter_list):
            k = ivy.tanh(head_parameter[:, 0:self._memory_vector_dim])
            beta = ivy.softplus(head_parameter[:, self._memory_vector_dim])
            g = ivy.sigmoid(head_parameter[:, self._memory_vector_dim + 1])
            s = ivy.softmax(
                head_parameter[:, self._memory_vector_dim + 2:self._memory_vector_dim +
                                                              2 + (self._shift_range * 2 + 1)])
            gamma = ivy.softplus(head_parameter[:, -1]) + 1
            w = self._addressing(k, beta, g, s, gamma, prev_M, prev_w_list[i])
            w_list.append(w)

        # Reading (Sec 3.1)

        read_w_list = w_list[:self._read_head_num]
        if self._step == 0:
            usage_indicator = ivy.zeros_like(w_list[0])
        else:
            usage_indicator = prev_state[3] + ivy.reduce_sum(ivy.concatenate(read_w_list, 0))
        read_vector_list = []
        for i in range(self._read_head_num):
            read_vector = ivy.reduce_sum(ivy.expand_dims(read_w_list[i], axis=2) * prev_M, axis=1)
            read_vector_list.append(read_vector)

        # Writing (Sec 3.2)

        prev_wrtie_w_list = prev_w_list[self._read_head_num:]
        w_wr_size = math.ceil(self._memory_size / 2) if self._retroactive_updates else self._memory_size
        if self._sequential_writing:
            batch_size = ivy.shape(x)[0]
            if self._step < w_wr_size:
                w_wr_list = [ivy.tile(ivy.cast(ivy.one_hot(
                    ivy.array([self._step]), w_wr_size), 'float32'),
                    (batch_size, 1))] * self._write_head_num
            else:
                batch_idxs = ivy.expand_dims(ivy.arange(batch_size, 0), -1)
                mem_idxs = ivy.expand_dims(ivy.argmax(usage_indicator[..., :w_wr_size], -1), -1)
                total_idxs = ivy.concatenate((batch_idxs, mem_idxs), -1)
                w_wr_list = [ivy.scatter_nd(total_idxs, ivy.ones((batch_size,)),
                                                (batch_size, w_wr_size))] * self._write_head_num
        else:
            w_wr_list = w_list[self._read_head_num:]
        if self._retroactive_updates:
            w_ret_list = [self._retroactive_discount * prev_wrtie_w[..., w_wr_size:] +
                          (1 - self._retroactive_discount) * prev_wrtie_w[..., :w_wr_size]
                          for prev_wrtie_w in prev_wrtie_w_list]
            w_wrtie_list = [ivy.concatenate((w_wr, w_ret), -1) for w_wr, w_ret in zip(w_wr_list, w_ret_list)]
        else:
            w_wrtie_list = w_wr_list
        M = prev_M
        for i in range(self._write_head_num):
            w = ivy.expand_dims(w_wrtie_list[i], axis=2)
            if self._with_erase:
                erase_vector = ivy.expand_dims(ivy.sigmoid(erase_add_list[i * 2]), axis=1)
                M = M * ivy.ones(ivy.shape(M)) - ivy.matmul(w, erase_vector)
            add_vector = ivy.expand_dims(ivy.tanh(erase_add_list[i * 2 + 1]), axis=1)
            M = M + ivy.matmul(w, add_vector)

        NTM_output = self._output_proj(ivy.concatenate([controller_output] + read_vector_list, axis=1))
        NTM_output = ivy.clip(NTM_output, -self._clip_value, self._clip_value)

        self._step += 1
        return NTM_output, NTMControllerState(
            controller_state=controller_state, read_vector_list=read_vector_list, w_list=w_list,
            usage_indicator=usage_indicator, M=M)
Beispiel #5
0
 def _forward(self, x):
     x = ivy.expand_dims(x, 0)
     x = ivy.tanh(self._linear0(x, v=self.v.linear0))
     x = ivy.tanh(self._linear1(x, v=self.v.linear1))
     return ivy.tanh(self._linear2(x, v=self.v.linear2))[0]
Beispiel #6
0
 def _forward(self, x):
     x = ivy.expand_dims(x, 0)
     x = ivy.tanh(self._layers[0](x))
     x = ivy.tanh(self._layers[1](x))
     return ivy.tanh(self._layers[2](x))[0]