def _bin_op_numeric_unify_types(self, name, other): def numeric_proxy(t): if t == tbool: return tint32 else: return t def scalar_type(t): if isinstance(t, tarray): return numeric_proxy(t.element_type) elif isinstance(t, tndarray): return numeric_proxy(t.element_type) else: return numeric_proxy(t) t = unify_types(scalar_type(self.dtype), scalar_type(other.dtype)) if t is None: raise NotImplementedError("'{}' {} '{}'".format( self.dtype, name, other.dtype)) if isinstance(self.dtype, tarray) or isinstance(other.dtype, tarray): return tarray(t) elif isinstance(self.dtype, tndarray): return tndarray(t, self.ndim) elif isinstance(other.dtype, tndarray): return tndarray(t, other.ndim) return t
def svd(nd, full_matrices=True, compute_uv=True): """Performs a singular value decomposition. :param nd: :class:`.NDArrayExpression` A 2 dimensional ndarray, shape(M, N). :param full_matrices: `bool` If True (default), u and vt have dimensions (M, M) and (N, N) respectively. Otherwise, they have dimensions (M, K) and (K, N), where K = min(M, N) :param compute_uv: `bool` If True (default), compute the singular vectors u and v. Otherwise, only return a single ndarray, s. Returns ------- - u: :class:`.NDArrayExpression` The left singular vectors. - s: :class:`.NDArrayExpression` The singular values. - vt: :class:`.NDArrayExpression` The right singular vectors. """ float_nd = nd.map(lambda x: hl.float64(x)) ir = NDArraySVD(float_nd._ir, full_matrices, compute_uv) return_type = ttuple(tndarray(tfloat64, 2), tndarray(tfloat64, 1), tndarray(tfloat64, 2)) if compute_uv else tndarray( tfloat64, 1) return construct_expr(ir, return_type)
def qr(nd, mode="reduced"): """Performs a QR decomposition. :param nd: A 2 dimensional ndarray, shape(M, N) :param mode: One of "reduced", "complete", "r", or "raw". If K = min(M, N), then: - `reduced`: returns q and r with dimensions (M, K), (K, N) - `complete`: returns q and r with dimensions (M, M), (M, N) - `r`: returns only r with dimensions (K, N) - `raw`: returns h, tau with dimensions (N, M), (K,) Returns ------- - q: ndarray of float64 A matrix with orthonormal columns. - r: ndarray of float64 The upper-triangular matrix R. - (h, tau): ndarrays of float64 The array h contains the Householder reflectors that generate q along with r. The tau array contains scaling factors for the reflectors """ assert nd.ndim == 2, "QR decomposition requires 2 dimensional ndarray" if mode not in ["reduced", "r", "raw", "complete"]: raise ValueError(f"Unrecognized mode '{mode}' for QR decomposition") float_nd = nd.map(lambda x: hl.float64(x)) ir = NDArrayQR(float_nd._ir, mode) indices = nd._indices aggs = nd._aggregations if mode == "raw": return construct_expr( ir, ttuple(tndarray(tfloat64, 2), tndarray(tfloat64, 1)), indices, aggs) elif mode == "r": return construct_expr(ir, tndarray(tfloat64, 2), indices, aggs) elif mode in ["complete", "reduced"]: return construct_expr( ir, ttuple(tndarray(tfloat64, 2), tndarray(tfloat64, 2)), indices, aggs)
def ndarray_floating_point_divide(arg_type, ret_type): register_function("div", ( arg_type, tndarray(arg_type, NatVariable()), ), tndarray(ret_type, NatVariable())) register_function("div", (tndarray(arg_type, NatVariable()), arg_type), tndarray(ret_type, NatVariable())) register_function("div", (tndarray( arg_type, NatVariable()), tndarray(arg_type, NatVariable())), tndarray(ret_type, NatVariable()))
def _bin_op_numeric(self, name, other, ret_type_f=None): other = to_expr(other) unified_type = self._bin_op_numeric_unify_types(name, other) me = self._promote_numeric(unified_type) other = other._promote_numeric(unified_type) if ret_type_f: if isinstance(unified_type, tarray): ret_type = tarray(ret_type_f(unified_type.element_type)) elif isinstance(unified_type, tndarray): ret_type = tndarray(ret_type_f(unified_type.element_type), unified_type.ndim) else: ret_type = ret_type_f(unified_type) else: ret_type = unified_type return me._bin_op(name, other, ret_type)
def inv(nd): """Performs a matrix inversion. :param nd: A 2 dimensional ndarray, shape(M, N) Returns ------- - a: ndarray of float64 The inverted matrix """ assert nd.ndim == 2, "Matrix inversion requires 2 dimensional ndarray" float_nd = nd.map(lambda x: hl.float64(x)) ir = NDArrayInv(float_nd._ir) return construct_expr(ir, tndarray(tfloat64, 2))
def concatenate(nds, axis=0): """Join a sequence of arrays along an existing axis. Examples -------- >>> x = hl.nd.array([[1., 2.], [3., 4.]]) >>> y = hl.nd.array([[5.], [6.]]) >>> hl.eval(hl.nd.concatenate([x, y], axis=1)) array([[1., 2., 5.], [3., 4., 6.]]) >>> x = hl.nd.array([1., 2.]) >>> y = hl.nd.array([3., 4.]) >>> hl.eval(hl.nd.concatenate((x, y), axis=0)) array([1., 2., 3., 4.]) Parameters ---------- :param nds: a1, a2, …sequence of array_like The arrays must have the same shape, except in the dimension corresponding to axis (the first, by default). Note: unlike Numpy, the numerical element type of each array_like must match. :param axis: int, optional The axis along which the arrays will be joined. Default is 0. Note: unlike Numpy, if provided, axis cannot be None. Returns ------- - res: ndarray The concatenated array """ head_nd = nds[0] head_ndim = head_nd.ndim hl.case().when(hl.all(lambda a: a.ndim == head_ndim, nds), True).or_error("Mismatched ndim") makearr = aarray(nds) concat_ir = NDArrayConcat(makearr._ir, axis) return construct_expr(concat_ir, tndarray(head_nd._type.element_type, head_ndim))
def _impute_type(x, partial_type): from hail.genetics import Locus, Call from hail.utils import Interval, Struct def refine(t, refined): if t is None: return refined if not isinstance(t, type(refined)): raise ExpressionException( "Incompatible partial_type, {}, for value {}".format( partial_type, x)) return t if isinstance(x, Expression): return x.dtype elif isinstance(x, bool): return tbool elif isinstance(x, int): if hl.tint32.min_value <= x <= hl.tint32.max_value: return tint32 elif hl.tint64.min_value <= x <= hl.tint64.max_value: return tint64 else: raise ValueError( "Hail has no integer data type large enough to store {}". format(x)) elif isinstance(x, float): return tfloat64 elif isinstance(x, str): return tstr elif isinstance(x, Locus): return tlocus(x.reference_genome) elif isinstance(x, Interval): return tinterval(x.point_type) elif isinstance(x, Call): return tcall elif isinstance(x, Struct) or isinstance(x, dict) and isinstance( partial_type, tstruct): partial_type = refine(partial_type, hl.tstruct()) t = tstruct(**{k: _impute_type(x[k], partial_type.get(k)) for k in x}) return t elif isinstance(x, tuple): partial_type = refine(partial_type, hl.ttuple()) return ttuple(*[ _impute_type( element, partial_type[index] if index < len(partial_type) else None) for index, element in enumerate(x) ]) elif isinstance(x, list): partial_type = refine(partial_type, hl.tarray(None)) if len(x) == 0: return partial_type ts = { _impute_type(element, partial_type.element_type) for element in x } unified_type = super_unify_types(*ts) if unified_type is None: raise ExpressionException( "Hail does not support heterogeneous arrays: " "found list with elements of types {} ".format(list(ts))) return tarray(unified_type) elif is_setlike(x): partial_type = refine(partial_type, hl.tset(None)) if len(x) == 0: return partial_type ts = { _impute_type(element, partial_type.element_type) for element in x } unified_type = super_unify_types(*ts) if not unified_type: raise ExpressionException( "Hail does not support heterogeneous sets: " "found set with elements of types {} ".format(list(ts))) return tset(unified_type) elif isinstance(x, Mapping): user_partial_type = partial_type partial_type = refine(partial_type, hl.tdict(None, None)) if len(x) == 0: return partial_type kts = { _impute_type(element, partial_type.key_type) for element in x.keys() } vts = { _impute_type(element, partial_type.value_type) for element in x.values() } unified_key_type = super_unify_types(*kts) unified_value_type = super_unify_types(*vts) if not unified_key_type: raise ExpressionException( "Hail does not support heterogeneous dicts: " "found dict with keys {} of types {} ".format( list(x.keys()), list(kts))) if not unified_value_type: if unified_key_type == hl.tstr and user_partial_type is None: return tstruct(**{k: _impute_type(x[k], None) for k in x}) raise ExpressionException( "Hail does not support heterogeneous dicts: " "found dict with values of types {} ".format(list(vts))) return tdict(unified_key_type, unified_value_type) elif isinstance(x, np.generic): return from_numpy(x.dtype) elif isinstance(x, np.ndarray): element_type = from_numpy(x.dtype) return tndarray(element_type, x.ndim) elif x is None or pd.isna(x): return partial_type elif isinstance( x, (hl.expr.builders.CaseBuilder, hl.expr.builders.SwitchBuilder)): raise ExpressionException( "'switch' and 'case' expressions must end with a call to either" "'default' or 'or_missing'") else: raise ExpressionException( "Hail cannot automatically impute type of {}: {}".format( type(x), x))
def impute_type(x): from hail.genetics import Locus, Call from hail.utils import Interval, Struct if isinstance(x, Expression): return x.dtype elif isinstance(x, bool): return tbool elif isinstance(x, int): if hl.tint32.min_value <= x <= hl.tint32.max_value: return tint32 elif hl.tint64.min_value <= x <= hl.tint64.max_value: return tint64 else: raise ValueError( "Hail has no integer data type large enough to store {}". format(x)) elif isinstance(x, float): return tfloat64 elif isinstance(x, str): return tstr elif isinstance(x, Locus): return tlocus(x.reference_genome) elif isinstance(x, Interval): return tinterval(x.point_type) elif isinstance(x, Call): return tcall elif isinstance(x, Struct): return tstruct(**{k: impute_type(x[k]) for k in x}) elif isinstance(x, tuple): return ttuple(*(impute_type(element) for element in x)) elif isinstance(x, list): if len(x) == 0: raise ExpressionException( "Cannot impute type of empty list. Use 'hl.empty_array' to create an empty array." ) ts = {impute_type(element) for element in x} unified_type = unify_types_limited(*ts) if unified_type is None: raise ExpressionException( "Hail does not support heterogeneous arrays: " "found list with elements of types {} ".format(list(ts))) return tarray(unified_type) elif isinstance(x, set): if len(x) == 0: raise ExpressionException( "Cannot impute type of empty set. Use 'hl.empty_set' to create an empty set." ) ts = {impute_type(element) for element in x} unified_type = unify_types_limited(*ts) if not unified_type: raise ExpressionException( "Hail does not support heterogeneous sets: " "found set with elements of types {} ".format(list(ts))) return tset(unified_type) elif isinstance(x, Mapping): if len(x) == 0: raise ExpressionException( "Cannot impute type of empty dict. Use 'hl.empty_dict' to create an empty dict." ) kts = {impute_type(element) for element in x.keys()} vts = {impute_type(element) for element in x.values()} unified_key_type = unify_types_limited(*kts) unified_value_type = unify_types_limited(*vts) if not unified_key_type: raise ExpressionException( "Hail does not support heterogeneous dicts: " "found dict with keys of types {} ".format(list(kts))) if not unified_value_type: raise ExpressionException( "Hail does not support heterogeneous dicts: " "found dict with values of types {} ".format(list(vts))) return tdict(unified_key_type, unified_value_type) elif isinstance(x, np.generic): return from_numpy(x.dtype) elif isinstance(x, np.ndarray): element_type = from_numpy(x.dtype) return tndarray(element_type, x.ndim) elif x is None: raise ExpressionException("Hail cannot impute the type of 'None'") elif isinstance( x, (hl.expr.builders.CaseBuilder, hl.expr.builders.SwitchBuilder)): raise ExpressionException( "'switch' and 'case' expressions must end with a call to either" "'default' or 'or_missing'") else: raise ExpressionException( "Hail cannot automatically impute type of {}: {}".format( type(x), x))