Exemple #1
0
def _nearpsd(A):
    r""" Obtain the "closest" positive semidefinite matrix to A."""
    n = A.shape[0]
    eigval, eigvec = np.linalg.eig(A)
    val = np.matrix(np.maximum(eigval, 0))
    vec = np.matrix(eigvec)
    T = 1 / (np.multiply(vec, vec) * val.T)
    T = np.matrix(np.sqrt(np.diag(np.array(T).reshape((n)))))
    B = T * vec * np.diag(np.array(np.sqrt(val)).reshape((n)))
    out = np.real(B * B.T)
    return out


@typecheck(mt=MatrixTable,
           genotype=oneof(expr_int32, expr_float64, expr_call),
           beta=oneof(expr_float64, expr_array(expr_float64)),
           h2=oneof(float, int, list, np.ndarray),
           popstrat=nullable(oneof(expr_int32, expr_float64)),
           popstrat_var=nullable(oneof(float, int)),
           exact_h2=bool)
def calculate_phenotypes(mt,
                         genotype,
                         beta,
                         h2,
                         popstrat=None,
                         popstrat_var=None,
                         exact_h2=False):
    r"""Calculates phenotypes by multiplying genotypes and betas.

    Parameters
    ----------
Exemple #2
0
import hail as hl
from hail.expr.expressions import expr_array, expr_call, expr_int32
from hail.typecheck import typecheck


@typecheck(lgt=expr_call, la=expr_array(expr_int32))
def lgt_to_gt(lgt, la):
    """Transform LGT into GT using local alleles array.

    Parameters
    ----------
    lgt : :class:`.CallExpression`
        LGT value.
    la : :class:`.ArrayExpression`
        Local alleles array.

    Returns
    -------
    :class:`.CallExpression`

    Notes
    -----
    This function assumes diploid genotypes.
    """
    return hl.call(la[lgt[0]], la[lgt[1]])
Exemple #3
0
import hail as hl
from hail.typecheck import typecheck
from hail.expr.expressions import expr_call, expr_numeric, expr_array, \
    check_entry_indexed, check_row_indexed


@typecheck(call_expr=expr_call,
           loadings_expr=expr_array(expr_numeric),
           af_expr=expr_numeric)
def pc_project(call_expr, loadings_expr, af_expr):
    """Projects genotypes onto pre-computed PCs. Requires loadings and
    allele-frequency from a reference dataset (see example). Note that
    `loadings_expr` must have no missing data and reflect the rows
    from the original PCA run for this method to be accurate.

    Example
    -------
    >>> # Compute loadings and allele frequency for reference dataset
    >>> _, _, loadings_ht = hl.hwe_normalized_pca(mt.GT, k=10, compute_loadings=True)   # doctest: +SKIP
    >>> mt = mt.annotate_rows(af=hl.agg.mean(mt.GT.n_alt_alleles()) / 2)                # doctest: +SKIP
    >>> loadings_ht = loadings_ht.annotate(af=mt.rows()[loadings_ht.key].af)            # doctest: +SKIP
    >>> # Project new genotypes onto loadings
    >>> ht = pc_project(mt_to_project.GT, loadings_ht.loadings, loadings_ht.af)         # doctest: +SKIP

    Parameters
    ----------
    call_expr : :class:`.CallExpression`
        Entry-indexed call expression for genotypes
        to project onto loadings.
    loadings_expr : :class:`.ArrayNumericExpression`
        Location of expression for loadings
import hail as hl
from hail.typecheck import typecheck, sequenceof
from hail.expr.expressions import expr_str, expr_call, expr_locus, expr_array
from hail.matrixtable import MatrixTable
from typing import List


@typecheck(locus=expr_locus(),
           alleles=expr_array(expr_str),
           proband_call=expr_call,
           father_call=expr_call,
           mother_call=expr_call)
def phase_by_transmission(
        locus: hl.expr.LocusExpression,
        alleles: hl.expr.ArrayExpression,
        proband_call: hl.expr.CallExpression,
        father_call: hl.expr.CallExpression,
        mother_call: hl.expr.CallExpression
) -> hl.expr.ArrayExpression:
    """Phases genotype calls in a trio based allele transmission.

    Notes
    -----
    In the phased calls returned, the order is as follows:
    - Proband: father_allele | mother_allele
    - Parents: transmitted_allele | untransmitted_allele

    Phasing of sex chromosomes:
    - Sex chromosomes of male individuals should be haploid to be phased correctly.
    - If `proband_call` is diploid on non-par regions of the sex chromosomes, it is assumed to be female.
Exemple #5
0
                            lambda j: hl.tuple([alleles.globl[j], j])))))),
            ts.row.dtype, ts.globals.dtype)
        _merge_function_map[(ts.row.dtype, ts.globals.dtype)] = f
    merge_function = _merge_function_map[(ts.row.dtype, ts.globals.dtype)]
    ts = Table(TableMapRows(ts._tir, Apply(merge_function._name,
                                           TopLevelReference('row'),
                                           TopLevelReference('global'))))
    return ts.transmute_globals(__cols=hl.flatten(ts.g.map(lambda g: g.__cols)))

def combine_gvcfs(mts):
    """merges vcfs using multi way join"""
    ts = hl.Table._multi_way_zip_join([localize(mt) for mt in mts], 'data', 'g')
    combined = combine(ts)
    return unlocalize(combined)

@typecheck(lgt=expr_call, la=expr_array(expr_int32))
def lgt_to_gt(lgt, la):
    """A method for transforming Local GT and Local Alleles into the true GT"""
    return hl.call(la[lgt[0]], la[lgt[1]])

def quick_summary(mt):
    """compute aggregate INFO fields that do not require densify"""
    return mt.annotate_rows(
        info=hl.struct(
            MQ_DP=hl.agg.sum(mt.entry.gvcf_info.MQ_DP),
            QUALapprox=hl.agg.sum(mt.entry.gvcf_info.QUALapprox),
            RAW_MQ=hl.agg.sum(mt.entry.gvcf_info.RAW_MQ),
            VarDP=hl.agg.sum(mt.entry.gvcf_info.VarDP),
            SB_TABLE=hl.array([
                hl.agg.sum(mt.entry.SB[0]),
                hl.agg.sum(mt.entry.SB[1]),
Exemple #6
0
from functools import reduce

import hail as hl
from hail.expr.functions import _ndarray
from hail.expr.functions import array as aarray
from hail.expr.types import HailType, tfloat64, ttuple, tndarray
from hail.typecheck import typecheck, nullable, oneof, tupleof, sequenceof
from hail.expr.expressions import (expr_int32, expr_int64, expr_tuple,
                                   expr_any, expr_array, expr_ndarray,
                                   expr_numeric, Int64Expression, cast_expr,
                                   construct_expr)
from hail.expr.expressions.typed_expressions import NDArrayNumericExpression
from hail.ir import NDArrayQR, NDArrayInv, NDArrayConcat, NDArraySVD, Apply

tsequenceof_nd = oneof(sequenceof(expr_ndarray()), expr_array(expr_ndarray()))
shape_type = oneof(expr_int64, tupleof(expr_int64), expr_tuple())


def array(input_array, dtype=None):
    """Construct an :class:`.NDArrayExpression`

    Examples
    --------

    >>> hl.eval(hl.nd.array([1, 2, 3, 4]))
    array([1, 2, 3, 4], dtype=int32)

    >>> hl.eval(hl.nd.array([[1, 2, 3], [4, 5, 6]]))
    array([[1, 2, 3],
       [4, 5, 6]], dtype=int32)
Exemple #7
0
from hail.expr.expressions import expr_int32, expr_array, ArrayExpression
from hail.expr.types import tfloat64, tarray
from hail.typecheck import typecheck
from hail.expr.functions import _func


@typecheck(gt_counts=expr_array(expr_int32))
def haplotype_freq_em(gt_counts) -> ArrayExpression:
    """
    Computes estimated haplotype counts based on genotypes for a pair of bi-allelic variants.
    Implements the Excoffier & Slatkin EM (Exccoffier & Slatkin, Mol. Biol. Evol. 1995)

    The unphased input genotype counts for the variant pairs has to be provided in the following order:
    [AABB, AABb, AAbb, AaBB, AaBb, Aabb, aaBB, aaBb, aabb]

    The estimated haplotype counts are returned in an array in the following order:
    [AB, Ab, aB, ab]

    Where _A_ and _a_ are the reference and non-reference alleles for the first variant, resp.
    And _B_ and _b_ are the reference and non-reference alleles for the second variant, resp.

    Parameters
    ----------
    gt_counts : :class:`.ArrayExpression`

    Returns
    -------
    :class:`.ArrayExpression`
    """
    return _func("haplotype_freq_em", tarray(tfloat64), gt_counts)
Exemple #8
0
    Parameters
    ----------
    input_array : :class:`.ArrayExpression` or numpy ndarray or nested python lists

    Returns
    -------
    :class:`.NDArrayExpression`
        An ndarray based on the input array.
    """
    return _ndarray(input_array)


shape_type = oneof(expr_int64, tupleof(expr_int64), expr_tuple())


@typecheck(a=expr_array(), shape=shape_type)
def from_column_major(a, shape):
    assert len(shape) == 2
    return array(a).reshape(tuple(reversed(shape))).T


@typecheck(start=expr_int32, stop=nullable(expr_int32), step=expr_int32)
def arange(start, stop=None, step=1) -> NDArrayNumericExpression:
    """Returns a 1-dimensions ndarray of integers from `start` to `stop` by `step`.

    Examples
    --------

    >>> hl.eval(hl.nd.arange(10))
    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32)
Exemple #9
0
from functools import reduce

import hail as hl
from hail.expr.functions import _ndarray
from hail.expr.functions import array as aarray
from hail.expr.types import HailType, tfloat64, ttuple, tndarray
from hail.typecheck import typecheck, nullable, oneof, tupleof, sequenceof
from hail.expr.expressions import (expr_int32, expr_int64, expr_tuple,
                                   expr_any, expr_array, expr_ndarray,
                                   expr_numeric, Int64Expression, cast_expr,
                                   construct_expr)
from hail.expr.expressions.typed_expressions import NDArrayNumericExpression
from hail.ir import NDArrayQR, NDArrayInv, NDArrayConcat

tsequenceof_nd = oneof(sequenceof(expr_ndarray()), tupleof(expr_ndarray()),
                       expr_array(expr_ndarray()))
shape_type = oneof(expr_int64, tupleof(expr_int64), expr_tuple())


def array(input_array, dtype=None):
    """Construct an :class:`.NDArrayExpression`

    Examples
    --------

    >>> hl.eval(hl.nd.array([1, 2, 3, 4]))
    array([1, 2, 3, 4], dtype=int32)

    >>> hl.eval(hl.nd.array([[1, 2, 3], [4, 5, 6]]))
    array([[1, 2, 3],
       [4, 5, 6]], dtype=int32)