Ejemplo n.º 1
0
class random_normal(RandomDistribution):
    r"""
    Returns a tensor with the specified shape, with random values from a normal
    distribution.
    
    Parameters
    ----------
    shape: <K, i32> (Required)
        * Target output tensor shape.
        * ``K`` is the rank of the output tensor.
          ``shape[k] > 0`` for ``k = 0,..., K-1``.
    mean: const<f32> (Optional)
        The mean (center) of the normal distribution. Defaults to 0.0.
    stddev: const<f32> (Optional)
        The standard deviation (width) of the normal distribution. Defaults to ``1.0``.
    seed: const<i32> (Optional)
        Seed to create a reproducible sequence of values across multiple invokes.
    
    Returns
    -------
    <\*, T>
        * A tensor of the given target output shape filled with random values.

    Attributes
    ----------
    T: fp16, fp32

    See Also
    --------
    random_categorical, random_bernoulli, random_uniform
    """
    
    input_spec = (
        InputSpec(
            shape=IntTensorInputType(),
            mean=FloatInputType(const=True, optional=True),
            stddev=FloatInputType(const=True, optional=True),
            seed=IntInputType(const=True, optional=True),
        )
        + RandomDistribution.input_spec
    )

    def default_inputs(self):
        return super().default_inputs() + \
            DefaultInputs(
                mean=0.,
                stddev=1.,
                seed=-1,
            )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def type_inference(self):
        if self.mean.dtype != self.stddev.dtype:
            raise ValueError("Incompatible primitive types in random_normal operation")
        self.out_dtype = self.mean.dtype
        return super().type_inference()
Ejemplo n.º 2
0
class thresholded_relu(Operation):
    """
    Return ``x`` if ``x >= alpha``, otherwise return ``0``.

    Parameters
    ----------
    x: tensor<\*?, T> (Required)
    alpha: const fp32 (Optional)
        * Default is ``1``.

    Returns
    -------
    tensor<\*, T>
        * A tensor of the same shape and type as ``x``.
    """

    input_spec = InputSpec(
        x=ScalarOrTensorInputType(),
        alpha=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(alpha=1., )

    def __init__(self, **kwargs):
        super(thresholded_relu, self).__init__(**kwargs)

    def type_inference(self):
        return self.x.sym_type

    @precondition(allow=VALUE)
    def value_inference(self):
        y = self.x.val
        y[y < self.alpha.val] = 0
        return y
Ejemplo n.º 3
0
class threshold(Operation):
    """
    Set a lower bound ``alpha`` to the values in the input ``x``, element-wise.
    Any values less than ``alpha`` are set to ``alpha``.

    Parameters
    ----------
    x: tensor<[\*d], T> (Required)
    alpha: const fp32 (Required)

    Returns
    -------
    tensor<[\*d], f32>
        * A tensor of the same shape as ``x``.

    Attributes
    ----------
    T: fp16, fp32, i32
    """

    input_spec = InputSpec(
        x=ScalarOrTensorInputType(),
        alpha=FloatInputType(const=True),
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def type_inference(self):
        return self.x.sym_type

    @precondition(allow=VALUE)
    def value_inference(self):
        return np.maximum(self.x.val, self.alpha.val)
Ejemplo n.º 4
0
class random_bernoulli(RandomDistribution):
    r"""
    Returns a tensor with the specified shape, with random values from a Bernoulli
    distribution.
    
    .. math::
       f(k) = \begin{cases}1-p  &\text{if } k = 0\\
                        p    &\text{if } k = 1\end{cases}

    for :math:`k` in :math:`\{0, 1\}`.
    
    Parameters
    ----------
    shape: <K, i32> (Required)
        * Target output tensor shape.
        * ``K`` is the rank of the output tensor.
          ``shape[k] > 0`` for ``k = 0,..., K-1``.
    prob: const<f32> (Optional)
        * The probability of sampling ``1``. Defaults to ``0.5``.
    seed: const<i32> (Optional)
        * Seed to create a reproducible sequence of values across multiple invokes.
    
    Returns
    -------
    <\*, T>
        * A tensor of the given target output shape filled with random values.

    Attributes
    ----------
    T: fp16, fp32

    See Also
    --------
    random_categorical, random_normal, random_uniform
    """
    
    input_spec = (
        InputSpec(
            shape=IntTensorInputType(),
            prob=FloatInputType(const=True, optional=True),
            seed=IntInputType(const=True, optional=True),
        )
        + RandomDistribution.input_spec
    )

    def default_inputs(self):
        return super().default_inputs() + \
            DefaultInputs(
                seed=-1,
                prob=0.5,
            )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def type_inference(self):
        self.out_dtype = self.prob.dtype
        return super().type_inference()
Ejemplo n.º 5
0
class sigmoid_hard(Operation):
    """
    Return ``min( max( alpha * x + beta, 0 ), 1 )`` elementwise.

    Parameters
    ----------
    x: tensor<\*?, T> (Required)
    alpha: const fp32 (Optional)
        * Default is ``0.2``.
    beta: const fp32 (Optional)
        * Default is ``0.5``.

    Returns
    -------
    tensor<\*?, fp32>
        * A tensor of the same shape and type as ``x``.

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=ScalarOrTensorInputType(),
        alpha=FloatInputType(const=True, optional=True),
        beta=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            alpha=0.2,
            beta=0.5,
            )

    def __init__(self, **kwargs):
        super(sigmoid_hard, self).__init__(**kwargs)

    @precondition(allow=VALUE)
    def value_inference(self):
        return np.minimum(
            np.maximum((self.alpha.val * self.x.val) + self.beta.val, 0), 1
        )

    def type_inference(self):
        return self.x.sym_type
Ejemplo n.º 6
0
class scaled_tanh(Operation):
    """
    Return ``alpha * tanh(beta * x)`` elementwise.

    Parameters
    ----------
    x: tensor<\*?, T> (Required)
        * Input range is ``(-inf, inf)``.
    alpha: const fp32 (Optional)
        * Default is ``1``.
    beta: const fp32 (Optional)
        * Default is ``1``.

    Returns
    -------
    tensor<\*?, fp32>
        * A tensor of the same shape and type as ``x``.

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=ScalarOrTensorInputType(),
        alpha=FloatInputType(const=True, optional=True),
        beta=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            alpha=1,
            beta=1,
        )

    def __init__(self, **kwargs):
        super(scaled_tanh, self).__init__(**kwargs)

    @precondition(allow=VALUE)
    def value_inference(self):
        return self.alpha.val * np.tanh(self.x.val * self.beta.val)

    def type_inference(self):
        return self.x.sym_type
Ejemplo n.º 7
0
class linear_activation(Operation):
    """
    Apply elementwise ``x * alpha + beta``.

    Parameters
    ----------
    x: tensor<\*?, T> (Required)
    alpha: const fp32 (Required)
    beta: const fp32 (Optional)
        * Default is ``0``.

    Returns
    -------
    tensor<\*?, T>
        * A tensor of the same shape and type as ``x``.

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=ScalarOrTensorInputType(),
        alpha=FloatInputType(const=True),
        beta=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            beta=0.,
            )

    def __init__(self, **kwargs):
        super(linear_activation, self).__init__(**kwargs)

    @precondition(allow=VALUE)
    def value_inference(self):
        return self.alpha.val * self.x.val + self.beta.val

    def type_inference(self):
        return self.x.sym_type
Ejemplo n.º 8
0
class clamped_relu(Operation):
    """
    If ``x >= 0`` return elementwise ``min(beta, x)``, otherwise return
    ``min(beta, alpha * x)``.

    Parameters
    ----------
    x: tensor<\*?, T> (Required)
    alpha: const fp32 (Required)
    beta: const fp32 (Required)

    Returns
    -------
    tensor<\*?, T>
        * A tensor of the same type and shape as ``x``.

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=ScalarOrTensorInputType(),
        alpha=FloatInputType(const=True),
        beta=FloatInputType(const=True),
    )

    def __init__(self, **kwargs):
        super(clamped_relu, self).__init__(**kwargs)

    @precondition(allow=VALUE)
    def value_inference(self):
        x = np.minimum(np.maximum(self.x.val, 0), self.beta.val)
        y = np.minimum(
            np.minimum(self.x.val, 0) * self.alpha.val, self.beta.val)
        return x + y

    def type_inference(self):
        return self.x.sym_type
Ejemplo n.º 9
0
class clip(Operation):
    """
    Clip the values in the input ``x`` to ``[alpha, beta]``, element-wise.
    Any values less than ``alpha`` are set to ``alpha``, and any values greater
    than ``beta`` are set to ``beta``.
    
    Parameters
    ----------
    x: tensor<[\*d], T> (Required)
    alpha: const f32 (Required)
    beta: const f32 (Required)
    
    Returns
    -------
    tensor<[\*d], f32>
        * A tensor of the same shape as ``x``.
    
    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=ScalarOrTensorInputType(),
        alpha=FloatInputType(const=True),
        beta=FloatInputType(const=True),
    )

    def __init__(self, **kwargs):
        super(clip, self).__init__(**kwargs)

    def type_inference(self):
        return self.x.sym_type

    @precondition(allow=VALUE)
    def value_inference(self):
        return np.minimum(np.maximum(self.x.val, self.alpha.val),
                          self.beta.val)
Ejemplo n.º 10
0
class TfLSTMBase(Operation):
    """
    Common LSTM inputs for BlockLSTMCell and BlockLSTM.
    """

    input_spec = InputSpec(
        c_prev=TensorInputType(),  # [batch, hidden_dim]
        h_prev=TensorInputType(),  # [batch, hidden_dim]
        # weight: [input_dim + hidden_dim, 4*hidden_dim] (icfo layout)
        weight=TensorInputType(const=True),
        forget_bias=FloatInputType(const=True, optional=True),
        # cell_clip == None implies not using cell clip
        cell_clip=FloatInputType(const=True, optional=True),
        # If use_peephole == False, weight_peep_* is ignored
        use_peephole=BoolInputType(const=True, optional=True),
        weight_peep_i=TensorInputType(const=True,
                                      optional=True),  # [hidden_dim,]
        weight_peep_f=TensorInputType(const=True,
                                      optional=True),  # [hidden_dim,]
        weight_peep_o=TensorInputType(const=True,
                                      optional=True),  # [hidden_dim,]
        bias=TensorInputType(const=True),  # [4*hidden_dim] (icfo layout)
    )

    def default_inputs(self):
        return DefaultInputs(
            forget_bias=1.,
            use_peephole=False,
        )

    def _check_peephole_weights(self):
        # Check weight_peep_*
        if self.use_peephole.val:
            if (self.weight_peep_i is None or self.weight_peep_f is None
                    or self.weight_peep_o is None):
                raise ValueError(
                    "weight_peep_* cannot be None when use_peephole is True")
Ejemplo n.º 11
0
class resample(_resample_iOS15):
    """
    iOS16 version of resample supports float16 coordinates
    """
    input_spec = InputSpec(
        x=TensorInputType(),
        coordinates=ScalarOrTensorInputType(type_domain=(np.int32, np.float32, np.float16)),
        sampling_mode=StringInputType(const=True),
        padding_mode=StringInputType(const=True),
        padding_value=FloatInputType(const=True),
        coordinates_mode=StringInputType(const=True),
        align_corners=BoolInputType(const=True),
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def type_inference(self):
        return super().type_inference()
Ejemplo n.º 12
0
class elu(Operation):
    """
    If ``x > 0`` return elementwise ``x``, otherwise return ``alpha * (e^x - 1)``.

    Parameters
    ----------
    x: tensor<\*?, T> (Required)
    alpha: const fp32 (Optional)
        * Default is ``1``.

    Returns
    -------
    tensor<\*?, T>
        * A tensor of the same shape and type as ``x``.

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=ScalarOrTensorInputType(),
        alpha=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            alpha=1.,
            )

    def __init__(self, **kwargs):
        super(elu, self).__init__(**kwargs)

    @precondition(allow=VALUE)
    def value_inference(self):
        b = np.copy(self.x.val)
        b[b < 0] = self.alpha.val * (np.exp(b[b < 0]) - 1)
        return b

    def type_inference(self):
        return self.x.sym_type
Ejemplo n.º 13
0
class leaky_relu(Operation):
    """
    If ``x >= 0`` apply ``x`` elementwise, otherwise apply ``alpha * x`` elementwise.

    Parameters
    ----------
    x: <*?, T> (Required)
    alpha: const fp32 (Optional)
        * Default is ``0.01``.

    Returns
    -------
    tensor<\*?, fp32>
        * A tensor of the same shape and type as ``x``.

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=ScalarOrTensorInputType(),
        alpha=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            alpha=0.01,
            )

    def __init__(self, **kwargs):
        super(leaky_relu, self).__init__(**kwargs)

    @precondition(allow=VALUE)
    def value_inference(self):
        b = np.copy(self.x.val)
        b[b < 0] *= self.alpha.val
        return b

    def type_inference(self):
        return self.x.sym_type
Ejemplo n.º 14
0
class rsqrt(Operation):
    """
    Return the reciprocal value of the square root of the input ``x``, element-wise.

    Parameters
    ----------
    x: tensor<[\*d], T> (Required)
    epsilon: const fp32 (Optional, default=1e-12)
        * This is a small constant that is added to the input, before applying the
          ``rsqrt`` function, for stability.
        * ``y = 1 / sqrt(x + epsilon)``.

    Returns
    -------
    tensor<[\*d], f32>
        * A tensor of the same shape as ``x``.

    Attributes
    ----------
    T: fp16, fp32
    """

    input_spec = InputSpec(
        x=ScalarOrTensorInputType(),
        epsilon=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(epsilon=1e-12, )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def type_inference(self):
        return self.x.sym_type

    @precondition(allow=VALUE)
    def value_inference(self):
        result = 1.0 / np.sqrt(self.x.val + self.epsilon.val)
        return _maintain_shape(self.x.val, result)
Ejemplo n.º 15
0
class inverse(Operation):
    """
    Return the reciprocal value of the input ``x``, element-wise.

    Parameters
    ----------
    x: tensor<[\*d], T> (Required)
    epsilon: const fp32 (Optional, default=1e-4)
        * This is a small constant that is added to the input, before taking its
          inverse, for stability.
        * ``y = 1 / (x + epsilon)``.

    Returns
    -------
    tensor<[\*d], f32>
        * A tensor of the same shape as ``x``.

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=ScalarOrTensorInputType(),
        epsilon=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(epsilon=1e-4, )

    def __init__(self, **kwargs):
        super(inverse, self).__init__(**kwargs)

    def type_inference(self):
        return self.x.sym_type

    @precondition(allow=VALUE)
    def value_inference(self):
        return np.reciprocal(self.x.val + self.epsilon.val)
Ejemplo n.º 16
0
class random_uniform(RandomDistribution):
    r"""
    Returns a tensor with the specified shape with random values from a uniform
    distribution. Samples are uniformly distributed over the half-open interval
    ``[low, high)`` (includes low, but excludes high).
    
    .. math::
       p(x) = \frac{1}{high - low}
    
    For a real number :math:`x`.
    
    When ``high == low``, values of ``low`` will be returned. If ``high < low``,
    the results are officially undefined and may eventually raise an error.
    
    Parameters
    ----------
    shape: <K, i32> (Required)
        * Target output tensor shape.
        * ``K`` is the rank of the output tensor.
          ``shape[k] > 0`` for ``k = 0,..., K-1``.
    low: const<f32> (Optional)
        * Lower boundary of the output interval (inclusive). Defaults to ``0.0``.
    high: const<f32> (Optional)
        * Upper boundary of the output interval (exclusive). Defaults to ``1.0``.
    seed: const<i32> (Optional)
        * Seed to create a reproducible sequence of values across multiple invokes.
    
    Returns
    -------
    <\*, T>
        * A tensor of the given target output shape filled with random values.

    Attributes
    ----------
    T: fp16, fp32

    See Also
    --------
    random_categorical, random_bernoulli, random_normal
    """
    
    input_spec = (
        InputSpec(
            shape=IntTensorInputType(),
            low=FloatInputType(const=True, optional=True),
            high=FloatInputType(const=True, optional=True),
            seed=IntInputType(const=True, optional=True),
        )
        + RandomDistribution.input_spec
    )

    def default_inputs(self):
        return super().default_inputs() + \
            DefaultInputs(
                low=0.,
                high=1.,
                seed=-1,
            )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def type_inference(self):
        if self.low.dtype != self.high.dtype:
            raise ValueError("Incompatible primitive types in random_uniform operation")
        self.out_dtype = self.low.dtype
        return super().type_inference()
Ejemplo n.º 17
0
class non_maximum_suppression(Operation):
    """
    Applies non-maximum suppression (NMS) on the input box coordinates according
    to their intersection-over-union (IoU).

    NMS selects a subset of bounding boxes in descending order of score, and removes
    boxes that have high intersection-over-union (IOU) overlap with previously-selected
    boxes.


    Parameters
    ----------

    boxes: tensor<[n, B, 4], T> (Required)
        * Box coordinates on which to perform NMS.
    scores: tensor<[n, B, K], T> (Required)
        * Scores for each one of the boxes.
    iou_threshold: const<T> (Required)
        * The intersection over union (``IoU``) threshold over which boxes are
          suppressed. NMS remove all overlapping boxes with ``IoU > iou_threshold``.
    score_threshold: const<T> (Required)
        * Before IoU suppression is performed, boxes with class scores below this
          threshold are rejected.
    max_boxes: const<i32> (Required)
        * Maximum number of boxes to select. If the number of surviving boxes are
          less, output is padded up to this number.
    per_class_suppression: const<bool> (Optional)
        * Default to ``False``.
        * If ``True``, suppression is performed independently within boxes of each class.

    Returns
    -------
    tensor<[n, max_boxes, 4], T>
        * Coordinates of selected boxes.
    tensor<[n, max_boxes, K], T>
        * Scores of selected boxes.
    tensor<[n, max_boxes], i32>
        * Indices of selected boxes.
    tensor<[n], i32>
        * Number of boxes selected for each batch.

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        boxes=TensorInputType(),
        scores=TensorInputType(),
        iou_threshold=FloatInputType(const=True),
        score_threshold=FloatInputType(const=True),
        max_boxes=IntInputType(const=True),
        per_class_suppression=BoolInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            per_class_suppression=False)

    def __init__(self, **kwargs):
        super(non_maximum_suppression, self).__init__(**kwargs)

    def type_inference(self):
        boxes_dtype = self.boxes.dtype
        scores_dtype = self.scores.dtype
        n_batch, _, n_score = self.scores.shape
        max_boxes = self.max_boxes.val

        return (
            types.tensor(boxes_dtype, (n_batch, max_boxes, 4)),
            types.tensor(scores_dtype, (n_batch, max_boxes, n_score)),
            types.tensor(types.int32, (n_batch, max_boxes)),
            types.tensor(types.int32, (n_batch,)),
        )
Ejemplo n.º 18
0
class pad(Operation):
    """
    Pad a tensor.

    Parameters
    ----------
    
    x: tensor<[\*D_in],T>  (Required)

    pad: tensor<[2\*N],i32> (Required)
        ``N <= D_in``. Last ``N`` dimensions of ``x`` are padded as follows:
        
        * For each dimension ``i`` of ``x`` if ``i >= D_in - N``:
            * pad ``pad[2*i]`` elements before ``x[..,i,..]``
            * pad ``pad[2*i+1]`` elements after ``x[..,i,..]``
        * If mode is "reflect" then ``pad[2*i]`` and ``pad[2*i+1]`` can be at
          most ``D[i]-1``.
        * If mode is "replicate" then ``pad[2*i]`` and ``pad[2*i+1]`` can be
          at most ``D[i]``.

    mode: const<str> (Optional)
        * Default to ``constant``.
        * Must be one of the following values:
          ``constant``, ``reflect``, or ``replicate``.

    constant_val: const<T> (Optional)
        * Default to ``0``.
        * Constant value to pad. Ignored if ``mode != constant``.

    Returns
    -------
    tensor<[\*D_out],T>
        * Tensor with same type as the input.

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        pad=IntTensorInputType(),
        mode=StringInputType(const=True, optional=True),
        constant_val=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(
            mode="constant",
            constant_val=0.,
            )

    def __init__(self, **kwargs):
        super(pad, self).__init__(**kwargs)

    def type_inference(self):
        in_shape = self.x.shape
        ret_shape = list(in_shape)
        pad = self.pad
        if len(pad.shape) != 1:
            raise ValueError("Pad should be a 1D tensor!")
        if self.mode and not self.mode.val in {'constant', 'reflect', 'replicate'}:
            raise ValueError("Pad mode should be one of {'constant', 'reflect', 'replicate'}")

        if pad.val is None:
            for i in range(self.pad.shape[0]//2):
                ret_shape[-self.pad.shape[0]//2+i] = get_new_symbol()
        else:
            pad = pad.val
            pad = pad.copy()

            if len(pad) % 2 != 0:
                raise ValueError("Number of elements in the argument Pad must be divisible by 2.")

            pad = pad.reshape(-1, 2)

            if pad.shape[0] > len(ret_shape):
                raise ValueError("Number of dimensions specified through pad must less than or equal to rank of input x")

            for i in range(len(pad)):
                ret_shape[-len(pad) + i] = ret_shape[-len(pad) + i] + pad[i][0] + pad[i][1]

        return types.tensor(self.x.dtype, tuple(ret_shape))

    @precondition(allow=VALUE)
    def value_inference(self):
        # NumPy `edge` mode is equivalent to `replicate` mode of PyTorch and CoreML
        mode = "edge" if self.mode.val == "replicate" else self.mode.val
        pad_val = self.pad.val

        if pad_val is None:
            return None

        if len(self.x.val.shape) > (pad_val.shape[0] // 2):
            updated_pad = np.zeros(len(self.x.val.shape) * 2)
            updated_pad[-pad_val.shape[0] :] = pad_val
            pad_val = updated_pad
        pad_val = pad_val.reshape(-1, 2).astype(np.int32)
        if mode == "constant":
            return np.pad(
                self.x.val, pad_val, mode, constant_values=self.constant_val.val
            )
        # NumPy does not support non-constant mode and constant_values argument
        return np.pad(self.x.val, pad_val, mode)
Ejemplo n.º 19
0
class lstm(Operation):
    r"""
    Single long short-term memory (LSTM) sequence.

    .. math::
       i_t = \rm{recurrent\_activation}(W_{ii} x_t + B_{ii} + W_{hi} h_(t-1) + B_{hi})

    .. math::
       f_t = \rm{recurrent\_activation}(W_{if} x_t + B_{if} + W_{hf} h_(t-1) + B_{hf})

    .. math::
       z_t = cell_activation(W_{iz} x_t + B_{iz} + W_{hz} h_(t-1) + B_{hz})

    .. math::
       o_t = \rm{recurrent\_activation}(W_{io} x_t + B_{io} + W_{ho} h_(t-1) + B_{ho})

    .. math::
       c_t = f_t * c_(t-1) + i_t * z_t

    .. math::
       h_t = o_t * activation(c_t)

    Where:

    * ``i_t``, ``f_t``, ``o_t``, and ``z_t`` are input, forget, output, and cell gates,
      respectively, at time ``t``.
    * ``c_t`` is cell state at time ``t``.
    * ``h_t``  is the hidden state at time ``t``.
    * ``W_{ii}``, ``W_{if}``, ``W_{io}``, and ``W_{iz}`` are input weights for input,
      forget, output and cell gate, respectively.
    * ``W_{hi}``, ``W_{hf}``, ``W_{ho}``, and ``W_{hz}`` are recurrent weights for input,
      forget, output and cell gate, respectively.

    Parameters
    ----------
    x: <s, b, I, T> (Required)
        * ``s`` is the sequence length, ``b`` is the batch size, and ``I`` is the
          input dimension.

    initial_h: <b, DIRECTION*H, T> (Required)
        * Initial hidden state. ``DIRECTION = 1`` for uni-directional, ``2`` for
          bi-directional LSTM.
        * ``H`` denotes hidden size.
        * ``[b, :H]`` and ``[b, H:]`` represents forward and reverse direction
          values, respectively.

    initial_c: <b, DIRECTION*H, T> (Required)
        * Initial cell state.
        * Format is same as ``initial_h``.

    weight_ih: const<4*H, I, T> (Required)
        * Input-hidden weight matrix
        * Weight tensor should be in order of
          ``[input_gate, forget_gate, output_gate, cell_gate]``.
        * If direction=="bidirectional", this is applied in forward direction.
        * If direction=="forward" or "backward" these weights are used.

    weight_hh: const<4*H, H, T> (Required)
        * Hidden-hidden weight matrix.
        * Weight tensor should be in order of
          ``[input_gate, forget_gate, output_gate, cell_gate]``.
        * If direction=="bidirectional", this is applied in forward direction.
        * If direction=="forward" or "backward" these weights are used.

    bias: const<4*H, T> (Optional) [Default all 0s]
        * bias = input-hidden bias + hidden-hidden bias
        * If direction=="bidirectional", this is applied in forward direction.
        * If direction=="forward" or "backward" this bias are used.

    peephole: const<3*H, T> (Optional, default to 0)
        * Weight tensor for peephole.
        * Order is ``[input_gate, forget_gate, output_gate]``.
        * Shape of each peephole vector is ``(H,)`` (``H`` is hidden size).
        * If direction=="bidirectional", this is applied in forward direction.
        * If direction=="forward" or "backward" these weights are used.

    weight_ih_back: const<4*H, I, T> (Optional) -
        * Input-hidden weight matrix for backward direction for `bidirectinal LSTM`.
        * Weight tensor should be in order of
          ``[input_gate, forget_gate, output_gate, cell_gate]``.
        * Must be provided for `bidirectional LSTM`.
        * This is only used when `direction` is "bidirectional".
        * For direction="reverse" use `weight_ih` instead.

    weight_hh_back: const<4*H, H, T> (Optional) - Hidden-hidden weight matrix
        * Hidden-hidden weight matrix for backward direction for `bidirectinal LSTM`.
        * Weight tensor should be in order of
          ``[input_gate, forget_gate, output_gate, cell_gate]``.
        * Must be provided for `bidirectional LSTM`.
        * This is only used when `direction` is "bidirectional".
        * For direction="reverse" use `weight_hh` instead.

    bias_back: const<4*H, T> (Optional) [Default all 0s]
        * bias = input-hidden bias + hidden-hidden bias.
        * Bias of backward direction for `bidirectional lstm`
        * This is only used when `direction` is "bidirectional".
        * For direction="reverse" use `bias` instead.

    peephole_back: const<3*H, T> (Optional, default to 0)
        * Weight tensor for peephole in backward direction for `bidirectional LSTM`.
        * Order is ``[input_gate, forget_gate, output_gate]``.
        * Shape of each peephole vector is ``(H,)`` (``H`` is hidden size).
        * Peephole of backward direction for `bidirectional lstm`
        * Bias of backward direction for `bidirectional lstm`
        * This is only used when `direction` is "bidirectional".
        * For direction="reverse" use `peephole` instead.

    direction: const<str> (Optional) [Default=forward]
        * One of the following: ``forward``, ``reverse``, or ``bidirectional``.
        * Must match ``DIRECTIONAL`` in initial states and weight parameters.

    output_sequence: const<bool> (Optional) [Default=False]
        * Outputs every step if ``True``.

    recurrent_activation: const<str> (Optional) [Default=sigmoid]
        * Activation applied on input, forget, and output gates.

    cell_activation: const<str> (Optional) [Default=tang]
        * Activation applied on cell gate.

    activation: const<str> (Optional) [Default=tanh]
        * Activation applied on output gate.

    clip: const<fp32> (optional) [Default=None]
        * Cell gate is clipped to ``[-clip, +clip]``.

    Returns
    -------
    <s, b, DIRECTION*H, T> or <1, b, DIRECTION*H, T>
        * If ``output_sequence == True`` (hidden states from every step):
          ``<s, b, DIRECTION*H, T>``.
        * Else ``<1, b, DIRECTION*H, T>`` (hidden states of the final step).
    <b, DIRECTION*H, T>
        * Hidden states of the final step.
    <b, DIRECTION*H, T>
        * Memory state of the final step.

    Attributes
    ----------
    T: fp32
    """

    input_spec = InputSpec(
        x=TensorInputType(),
        initial_h=TensorInputType(),
        initial_c=TensorInputType(),
        weight_ih=TensorInputType(const=True),  # ifoz layout,
        weight_hh=TensorInputType(const=True),  # ifoz layout
        bias=TensorInputType(const=True, optional=True),  # ifoz layout
        peephole=TensorInputType(const=True, optional=True),  # ifo layout
        weight_ih_back=TensorInputType(const=True,
                                       optional=True),  # ifoz layout,
        weight_hh_back=TensorInputType(const=True,
                                       optional=True),  # ifoz layout
        bias_back=TensorInputType(const=True, optional=True),  # ifoz layout
        peephole_back=TensorInputType(const=True, optional=True),  # ifo layout
        direction=StringInputType(const=True, optional=True),
        output_sequence=BoolInputType(const=True, optional=True),
        recurrent_activation=StringInputType(const=True, optional=True),
        cell_activation=StringInputType(const=True, optional=True),
        activation=StringInputType(const=True, optional=True),
        clip=FloatInputType(const=True, optional=True),
    )

    def default_inputs(self):
        return DefaultInputs(bias=None,
                             direction="forward",
                             output_sequence=False,
                             recurrent_activation="sigmoid",
                             cell_activation="tanh",
                             activation="tanh",
                             peephole=None,
                             clip=None)

    def __init__(self, **kwargs):
        super(lstm, self).__init__(**kwargs)

    def type_inference(self):
        if self.x.rank != 3:
            raise ValueError(
                "Invalid input shape. Expecting Rank 3 input, got {}".format(
                    len(self.x.rank)))
        sequence_length, batch_size, input_size = self.x.shape

        def weight_shape_check(wt_ih, wt_hh):
            if wt_ih.rank != 2 or wt_hh.rank != 2:
                raise ValueError(
                    "Expecting Rank 2 input, got weight_ih rank: {}, weight_hh rank: {}"
                ).format(wt_ih.rank, wt_hh.rank)

            hidden_size = wt_hh.shape[1]
            if wt_hh.shape[0] // hidden_size != 4 or wt_ih.shape[
                    0] // hidden_size != 4:
                raise ValueError(
                    "Incorrect weight matrix: hidden dim size mismatch. \
                                Provided weight_ih {}, weight_hh {}. Expecting <4*H, H>"
                ).format(wt_ih.shape, wt_hh.shape)

        direction = self.direction.val
        valid_directions = {"forward", "reverse", "bidirectional"}
        if direction not in valid_directions:
            raise ValueError(
                "Direction {} not supported. Supported directions: {}").format(
                    direction, valid_directions)

        weight_shape_check(self.weight_ih, self.weight_hh)
        if direction == "bidirectional":
            weight_shape_check(self.weight_ih_back, self.weight_hh_back)

        hidden_dim, hidden_size = self.weight_hh.shape

        dim_factor = 8 if direction == "bidirectional" else 4
        out_seq_len = sequence_length if self.output_sequence.val else 1
        num_directions = dim_factor // 4
        output_shape = [out_seq_len, batch_size, num_directions * hidden_size]
        output_h_shape = [batch_size, num_directions * hidden_size]
        output_c_shape = [batch_size, num_directions * hidden_size]
        return (
            types.tensor(self.x.dtype, tuple(output_shape)),
            types.tensor(self.x.dtype, tuple(output_h_shape)),
            types.tensor(self.x.dtype, tuple(output_c_shape)),
        )