Beispiel #1
0
    def _bin_op_numeric_unify_types(self, name, other):
        def numeric_proxy(t):
            if t == tbool:
                return tint32
            else:
                return t

        def scalar_type(t):
            if isinstance(t, tarray):
                return numeric_proxy(t.element_type)
            elif isinstance(t, tndarray):
                return numeric_proxy(t.element_type)
            else:
                return numeric_proxy(t)

        t = unify_types(scalar_type(self.dtype), scalar_type(other.dtype))
        if t is None:
            raise NotImplementedError("'{}' {} '{}'".format(
                self.dtype, name, other.dtype))

        if isinstance(self.dtype, tarray) or isinstance(other.dtype, tarray):
            return tarray(t)
        elif isinstance(self.dtype, tndarray):
            return tndarray(t, self.ndim)
        elif isinstance(other.dtype, tndarray):
            return tndarray(t, other.ndim)

        return t
Beispiel #2
0
def svd(nd, full_matrices=True, compute_uv=True):
    """Performs a singular value decomposition.

    :param nd: :class:`.NDArrayExpression`
        A 2 dimensional ndarray, shape(M, N).
    :param full_matrices: `bool`
        If True (default), u and vt have dimensions (M, M) and (N, N) respectively. Otherwise, they have dimensions
        (M, K) and (K, N), where K = min(M, N)
    :param compute_uv: `bool`
        If True (default), compute the singular vectors u and v. Otherwise, only return a single ndarray, s.

    Returns
    -------
    - u: :class:`.NDArrayExpression`
        The left singular vectors.
    - s: :class:`.NDArrayExpression`
        The singular values.
    - vt: :class:`.NDArrayExpression`
        The right singular vectors.
    """
    float_nd = nd.map(lambda x: hl.float64(x))
    ir = NDArraySVD(float_nd._ir, full_matrices, compute_uv)

    return_type = ttuple(tndarray(tfloat64, 2), tndarray(tfloat64, 1),
                         tndarray(tfloat64, 2)) if compute_uv else tndarray(
                             tfloat64, 1)
    return construct_expr(ir, return_type)
Beispiel #3
0
def qr(nd, mode="reduced"):
    """Performs a QR decomposition.

    :param nd: A 2 dimensional ndarray, shape(M, N)
    :param mode: One of "reduced", "complete", "r", or "raw".

        If K = min(M, N), then:

        - `reduced`: returns q and r with dimensions (M, K), (K, N)
        - `complete`: returns q and r with dimensions (M, M), (M, N)
        - `r`: returns only r with dimensions (K, N)
        - `raw`: returns h, tau with dimensions (N, M), (K,)

    Returns
    -------
    - q: ndarray of float64
        A matrix with orthonormal columns.
    - r: ndarray of float64
        The upper-triangular matrix R.
    - (h, tau): ndarrays of float64
        The array h contains the Householder reflectors that generate q along with r.
        The tau array contains scaling factors for the reflectors
    """

    assert nd.ndim == 2, "QR decomposition requires 2 dimensional ndarray"

    if mode not in ["reduced", "r", "raw", "complete"]:
        raise ValueError(f"Unrecognized mode '{mode}' for QR decomposition")

    float_nd = nd.map(lambda x: hl.float64(x))
    ir = NDArrayQR(float_nd._ir, mode)
    indices = nd._indices
    aggs = nd._aggregations
    if mode == "raw":
        return construct_expr(
            ir, ttuple(tndarray(tfloat64, 2), tndarray(tfloat64, 1)), indices,
            aggs)
    elif mode == "r":
        return construct_expr(ir, tndarray(tfloat64, 2), indices, aggs)
    elif mode in ["complete", "reduced"]:
        return construct_expr(
            ir, ttuple(tndarray(tfloat64, 2), tndarray(tfloat64, 2)), indices,
            aggs)
Beispiel #4
0
 def ndarray_floating_point_divide(arg_type, ret_type):
     register_function("div", (
         arg_type,
         tndarray(arg_type, NatVariable()),
     ), tndarray(ret_type, NatVariable()))
     register_function("div", (tndarray(arg_type, NatVariable()), arg_type),
                       tndarray(ret_type, NatVariable()))
     register_function("div", (tndarray(
         arg_type, NatVariable()), tndarray(arg_type, NatVariable())),
                       tndarray(ret_type, NatVariable()))
Beispiel #5
0
 def _bin_op_numeric(self, name, other, ret_type_f=None):
     other = to_expr(other)
     unified_type = self._bin_op_numeric_unify_types(name, other)
     me = self._promote_numeric(unified_type)
     other = other._promote_numeric(unified_type)
     if ret_type_f:
         if isinstance(unified_type, tarray):
             ret_type = tarray(ret_type_f(unified_type.element_type))
         elif isinstance(unified_type, tndarray):
             ret_type = tndarray(ret_type_f(unified_type.element_type),
                                 unified_type.ndim)
         else:
             ret_type = ret_type_f(unified_type)
     else:
         ret_type = unified_type
     return me._bin_op(name, other, ret_type)
Beispiel #6
0
def inv(nd):
    """Performs a matrix inversion.

    :param nd: A 2 dimensional ndarray, shape(M, N)

    Returns
    -------
    - a: ndarray of float64
        The inverted matrix
    """

    assert nd.ndim == 2, "Matrix inversion requires 2 dimensional ndarray"

    float_nd = nd.map(lambda x: hl.float64(x))
    ir = NDArrayInv(float_nd._ir)
    return construct_expr(ir, tndarray(tfloat64, 2))
Beispiel #7
0
def concatenate(nds, axis=0):
    """Join a sequence of arrays along an existing axis.

    Examples
    --------

    >>> x = hl.nd.array([[1., 2.], [3., 4.]])
    >>> y = hl.nd.array([[5.], [6.]])
    >>> hl.eval(hl.nd.concatenate([x, y], axis=1))
    array([[1., 2., 5.],
           [3., 4., 6.]])
    >>> x = hl.nd.array([1., 2.])
    >>> y = hl.nd.array([3., 4.])
    >>> hl.eval(hl.nd.concatenate((x, y), axis=0))
    array([1., 2., 3., 4.])

    Parameters
    ----------
    :param nds: a1, a2, …sequence of array_like
        The arrays must have the same shape, except in the dimension corresponding to axis (the first, by default).
        Note: unlike Numpy, the numerical element type of each array_like must match.
    :param axis: int, optional
        The axis along which the arrays will be joined. Default is 0.
        Note: unlike Numpy, if provided, axis cannot be None.

    Returns
    -------
    - res: ndarray
        The concatenated array
    """
    head_nd = nds[0]
    head_ndim = head_nd.ndim
    hl.case().when(hl.all(lambda a: a.ndim == head_ndim, nds),
                   True).or_error("Mismatched ndim")

    makearr = aarray(nds)
    concat_ir = NDArrayConcat(makearr._ir, axis)

    return construct_expr(concat_ir,
                          tndarray(head_nd._type.element_type, head_ndim))
Beispiel #8
0
def _impute_type(x, partial_type):
    from hail.genetics import Locus, Call
    from hail.utils import Interval, Struct

    def refine(t, refined):
        if t is None:
            return refined
        if not isinstance(t, type(refined)):
            raise ExpressionException(
                "Incompatible partial_type, {}, for value {}".format(
                    partial_type, x))
        return t

    if isinstance(x, Expression):
        return x.dtype
    elif isinstance(x, bool):
        return tbool
    elif isinstance(x, int):
        if hl.tint32.min_value <= x <= hl.tint32.max_value:
            return tint32
        elif hl.tint64.min_value <= x <= hl.tint64.max_value:
            return tint64
        else:
            raise ValueError(
                "Hail has no integer data type large enough to store {}".
                format(x))
    elif isinstance(x, float):
        return tfloat64
    elif isinstance(x, str):
        return tstr
    elif isinstance(x, Locus):
        return tlocus(x.reference_genome)
    elif isinstance(x, Interval):
        return tinterval(x.point_type)
    elif isinstance(x, Call):
        return tcall
    elif isinstance(x, Struct) or isinstance(x, dict) and isinstance(
            partial_type, tstruct):
        partial_type = refine(partial_type, hl.tstruct())
        t = tstruct(**{k: _impute_type(x[k], partial_type.get(k)) for k in x})
        return t
    elif isinstance(x, tuple):
        partial_type = refine(partial_type, hl.ttuple())
        return ttuple(*[
            _impute_type(
                element,
                partial_type[index] if index < len(partial_type) else None)
            for index, element in enumerate(x)
        ])
    elif isinstance(x, list):
        partial_type = refine(partial_type, hl.tarray(None))
        if len(x) == 0:
            return partial_type
        ts = {
            _impute_type(element, partial_type.element_type)
            for element in x
        }
        unified_type = super_unify_types(*ts)
        if unified_type is None:
            raise ExpressionException(
                "Hail does not support heterogeneous arrays: "
                "found list with elements of types {} ".format(list(ts)))
        return tarray(unified_type)

    elif is_setlike(x):
        partial_type = refine(partial_type, hl.tset(None))
        if len(x) == 0:
            return partial_type
        ts = {
            _impute_type(element, partial_type.element_type)
            for element in x
        }
        unified_type = super_unify_types(*ts)
        if not unified_type:
            raise ExpressionException(
                "Hail does not support heterogeneous sets: "
                "found set with elements of types {} ".format(list(ts)))
        return tset(unified_type)

    elif isinstance(x, Mapping):
        user_partial_type = partial_type
        partial_type = refine(partial_type, hl.tdict(None, None))
        if len(x) == 0:
            return partial_type
        kts = {
            _impute_type(element, partial_type.key_type)
            for element in x.keys()
        }
        vts = {
            _impute_type(element, partial_type.value_type)
            for element in x.values()
        }
        unified_key_type = super_unify_types(*kts)
        unified_value_type = super_unify_types(*vts)
        if not unified_key_type:
            raise ExpressionException(
                "Hail does not support heterogeneous dicts: "
                "found dict with keys {} of types {} ".format(
                    list(x.keys()), list(kts)))
        if not unified_value_type:
            if unified_key_type == hl.tstr and user_partial_type is None:
                return tstruct(**{k: _impute_type(x[k], None) for k in x})

            raise ExpressionException(
                "Hail does not support heterogeneous dicts: "
                "found dict with values of types {} ".format(list(vts)))
        return tdict(unified_key_type, unified_value_type)
    elif isinstance(x, np.generic):
        return from_numpy(x.dtype)
    elif isinstance(x, np.ndarray):
        element_type = from_numpy(x.dtype)
        return tndarray(element_type, x.ndim)
    elif x is None or pd.isna(x):
        return partial_type
    elif isinstance(
            x, (hl.expr.builders.CaseBuilder, hl.expr.builders.SwitchBuilder)):
        raise ExpressionException(
            "'switch' and 'case' expressions must end with a call to either"
            "'default' or 'or_missing'")
    else:
        raise ExpressionException(
            "Hail cannot automatically impute type of {}: {}".format(
                type(x), x))
Beispiel #9
0
def impute_type(x):
    from hail.genetics import Locus, Call
    from hail.utils import Interval, Struct

    if isinstance(x, Expression):
        return x.dtype
    elif isinstance(x, bool):
        return tbool
    elif isinstance(x, int):
        if hl.tint32.min_value <= x <= hl.tint32.max_value:
            return tint32
        elif hl.tint64.min_value <= x <= hl.tint64.max_value:
            return tint64
        else:
            raise ValueError(
                "Hail has no integer data type large enough to store {}".
                format(x))
    elif isinstance(x, float):
        return tfloat64
    elif isinstance(x, str):
        return tstr
    elif isinstance(x, Locus):
        return tlocus(x.reference_genome)
    elif isinstance(x, Interval):
        return tinterval(x.point_type)
    elif isinstance(x, Call):
        return tcall
    elif isinstance(x, Struct):
        return tstruct(**{k: impute_type(x[k]) for k in x})
    elif isinstance(x, tuple):
        return ttuple(*(impute_type(element) for element in x))
    elif isinstance(x, list):
        if len(x) == 0:
            raise ExpressionException(
                "Cannot impute type of empty list. Use 'hl.empty_array' to create an empty array."
            )
        ts = {impute_type(element) for element in x}
        unified_type = unify_types_limited(*ts)
        if unified_type is None:
            raise ExpressionException(
                "Hail does not support heterogeneous arrays: "
                "found list with elements of types {} ".format(list(ts)))
        return tarray(unified_type)
    elif isinstance(x, set):
        if len(x) == 0:
            raise ExpressionException(
                "Cannot impute type of empty set. Use 'hl.empty_set' to create an empty set."
            )
        ts = {impute_type(element) for element in x}
        unified_type = unify_types_limited(*ts)
        if not unified_type:
            raise ExpressionException(
                "Hail does not support heterogeneous sets: "
                "found set with elements of types {} ".format(list(ts)))
        return tset(unified_type)
    elif isinstance(x, Mapping):
        if len(x) == 0:
            raise ExpressionException(
                "Cannot impute type of empty dict. Use 'hl.empty_dict' to create an empty dict."
            )
        kts = {impute_type(element) for element in x.keys()}
        vts = {impute_type(element) for element in x.values()}
        unified_key_type = unify_types_limited(*kts)
        unified_value_type = unify_types_limited(*vts)
        if not unified_key_type:
            raise ExpressionException(
                "Hail does not support heterogeneous dicts: "
                "found dict with keys of types {} ".format(list(kts)))
        if not unified_value_type:
            raise ExpressionException(
                "Hail does not support heterogeneous dicts: "
                "found dict with values of types {} ".format(list(vts)))
        return tdict(unified_key_type, unified_value_type)
    elif isinstance(x, np.generic):
        return from_numpy(x.dtype)
    elif isinstance(x, np.ndarray):
        element_type = from_numpy(x.dtype)
        return tndarray(element_type, x.ndim)
    elif x is None:
        raise ExpressionException("Hail cannot impute the type of 'None'")
    elif isinstance(
            x, (hl.expr.builders.CaseBuilder, hl.expr.builders.SwitchBuilder)):
        raise ExpressionException(
            "'switch' and 'case' expressions must end with a call to either"
            "'default' or 'or_missing'")
    else:
        raise ExpressionException(
            "Hail cannot automatically impute type of {}: {}".format(
                type(x), x))