Beispiel #1
0
def local_exp_over_1_plus_exp(fgraph, node):
    """
    exp(x)/(1+exp(x)) -> sigm(x)
    c/(1+exp(x)) -> c*sigm(-x)

    """
    # this optimization should be done for numerical stability
    # so we don't care to check client counts
    if node.op == true_div:

        # find all the exp() terms in the numerator
        num, denom = node.inputs
        num_exp_x, num_rest, num_neg = partition_num_or_denom(num, is_exp)
        denom_1pexp, denom_rest, denom_neg = partition_num_or_denom(
            denom, is_1pexp)

        sigmoids = []
        for t in denom_1pexp:
            if t in num_exp_x:
                # case: exp(x) /(1+exp(x))
                sigmoids.append(sigmoid(t))
                del num_exp_x[num_exp_x.index(t)]
            else:
                # case: 1/(1+exp(x))
                sigmoids.append(sigmoid(-t))
            copy_stack_trace(node.outputs[0], sigmoids[-1])

        if not sigmoids:  # we didn't find any.  abort
            return
        # put the new numerator together
        new_num = sigmoids + [exp(t) for t in num_exp_x] + num_rest
        if len(new_num) == 1:
            new_num = new_num[0]
        else:
            new_num = mul(*new_num)

        if num_neg ^ denom_neg:
            new_num = -new_num

        copy_stack_trace(num, new_num)

        if len(denom_rest) == 0:
            return [new_num]
        elif len(denom_rest) == 1:
            out = new_num / denom_rest[0]
        else:
            out = new_num / mul(*denom_rest)

        copy_stack_trace(node.outputs[0], out)
        return [out]
Beispiel #2
0
    def test_composite_elemwise_float16(self):
        w = bvector()
        x = vector(dtype="float16")
        y = fvector()

        cz = tanh(x + aet.cast(y, "float16"))
        o = (
            cz
            - cz ** 2
            + aet.cast(x, "int16")
            + aet.cast(x, "float32")
            + aet.cast(w, "float16")
            - aet.constant(np.float16(1.0))
        )

        aesara.function([w, x, y], o, mode=mode_with_gpu)

        v = vector(dtype="uint8")
        w = vector(dtype="float16")
        x = vector(dtype="float16")
        y = vector(dtype="float16")
        z = vector(dtype="float16")

        o = aet.switch(v, mul(w, x, y), z)
        aesara.function([v, w, x, y, z], o, mode=mode_with_gpu)
Beispiel #3
0
def compute_mul(tree):
    """
    Compute the Variable that is the output of a multiplication tree.

    This is the inverse of the operation performed by `parse_mul_tree`, i.e.
    compute_mul(parse_mul_tree(tree)) == tree.

    Parameters
    ----------
    tree
        A multiplication tree (as output by `parse_mul_tree`).

    Returns
    -------
    object
        A Variable that computes the multiplication represented by the tree.

    """
    neg, inputs = tree
    if inputs is None:
        raise AssertionError(
            "Function `compute_mul` found a missing leaf, did you forget to "
            "call `simplify_mul` on the tree first?")
    elif isinstance(inputs, list):
        # Recurse through inputs.
        rval = mul(*list(map(compute_mul, inputs)))
    else:
        rval = inputs
    if neg:
        rval = -rval
    return rval
Beispiel #4
0
 def infer_shape(self, fgraph, node, inputs_shapes):
     if _variable_is_none(node.inputs[1]):
         return [(mul(*inputs_shapes[0]),)]
     # axis should not be None, so there should be the same number of
     # dimensions in the input and output
     assert node.inputs[0].ndim == node.outputs[0].ndim
     assert inputs_shapes[1] == ()
     return [inputs_shapes[0]]
Beispiel #5
0
 def infer_shape(self, fgraph, node, inputs_shapes):
     if _variable_is_none(node.inputs[1]):
         # That means axis = None,
         # So the array is flattened before being sorted
         return [(mul(*inputs_shapes[0]),)]
     # axis should not be None
     # So there should be the same number of dimensions
     # in the input and output
     assert node.inputs[0].ndim == node.outputs[0].ndim
     assert inputs_shapes[1] == ()
     return [inputs_shapes[0]]
Beispiel #6
0
def is_neg(var):
    """
    Match a variable with the `-x` pattern.

    Parameters
    ----------
    var
        The Variable to analyze.

    Returns
    -------
    object
        `x` if `var` is of the form `-x`, or None otherwise.

    """
    var_node = var.owner
    if not var_node:
        return None
    # First match against `neg`.
    if var_node.op == neg:
        return var_node.inputs[0]
    # Then match against a multiplication by -1.
    if var_node.op == mul and len(var_node.inputs) >= 2:
        for idx, mul_input in enumerate(var_node.inputs):
            try:
                constant = get_scalar_constant_value(mul_input)
                is_minus_1 = np.allclose(constant, -1)
            except NotScalarConstantError:
                is_minus_1 = False
            if is_minus_1:
                # Found a multiplication by -1.
                if len(var_node.inputs) == 2:
                    # Only return the other input.
                    return var_node.inputs[1 - idx]
                else:
                    # Return the multiplication of all other inputs.
                    return mul(*(var_node.inputs[0:idx] +
                                 var_node.inputs[idx + 1:]))
    # No match.
    return None
Beispiel #7
0
    def infer_shape(self, fgraph, node, ishapes):
        from aesara.tensor.math import eq, maximum, mul

        # inputs[1] can contain at most one value of '-1', meaning the actual
        # shape of the output will be automatically computed by reshape, so
        # that the total number of elements stays the same.
        # TODO: Maybe put that formula here?
        # It's not trivial, because we would have to check if the product of
        # all the non-minus-one shapes is a divisor of the product of the
        # original shapes.
        # The following expression leads to cycles in feature_shape,
        # because it tries to replace the Shape_i node by the switch
        # statement, which depends on Shape_i.
        # return [tuple([switch(eq(node.inputs[1][i], -1),
        #                      Shape_i(i)(node.outputs[0]),
        #                      node.inputs[1][i])
        #                    for i in range(self.ndim)]
        #    )]
        # Here, we only simplify if the shape (node.inputs[1]) is a constant,
        # ideally it would suffice to check that it is always non-negative.
        # If current variable is a scalar and its dimensionality should
        # change to self.ndim, then use size 1 for all new dimensions.
        if len(ishapes[0]) == 0:
            return [(1, ) * self.ndim]

        requ = node.inputs[1]
        input_size = mul(*ishapes[0])
        if isinstance(requ, TensorConstant):
            requ = list(requ.data)
            requ_part = [ele for ele in requ if ele != -1]
            crit = len(requ) - len(requ_part)
            if crit == 1 and len(requ_part) > 0:
                # If there are both 0 and -1 in requ_size, it is impossible
                # to determine a right output, but we can at least prevent
                # a division by 0. We do not want to keep a negative
                # size here as it could lead to further weird errors
                # after other optimizations.
                requ_size = mul(*requ_part)
                missing = input_size // (1 if requ_size == 0 else requ_size)
                for i, ele in enumerate(requ):
                    if ele == -1:
                        requ[i] = missing
            elif crit == 1:  # we reshape to -1
                requ = [input_size] if ishapes[0] else [1]
            elif crit > 1:
                raise ValueError("shape argument to Reshape.perform"
                                 " must have at most one entry equal to -1")
            return [requ]
        else:

            requ = [requ[i] for i in range(self.ndim)]
            # since new_dims can have negative value (-1), the
            # multiplication of all values should be negated
            # to give a positive value.
            # To avoid optimization complexity, we avoid checking
            # for the case when there are two or more '-1' values.
            if self.ndim:
                requ_size = -mul(*requ)
                # If there are both 0 and -1 in requ_size, it is impossible
                # to determine a right output, but we can at least prevent
                # a division by 0. We do not want to keep a negative
                # size here as it could lead to further weird errors
                # after other optimizations.
                rest_size = input_size // maximum(requ_size, 1)
            return [
                tuple([
                    aet.switch(eq(requ[i], -1), rest_size, requ[i])
                    for i in range(self.ndim)
                ])
            ]