Exemple #1
0
def get_r_for_pair_of_variants(bm: BlockMatrix, ld_index: hl.Table,
                               var1: (hl.tlocus, hl.tarray(hl.tstr)),
                               var2: (hl.tlocus, hl.tarray(hl.tstr))):
    """
    Get `r` value (LD) for pair of variants `var1` and `var2`.

    .. code-block:: python

        bm = get_ld_matrix('nfe')
        ld_index = get_ld_index('nfe')
        var1 = (hl.parse_locus('1:10146', 'GRCh37'), ['AC', 'A'])
        var2 = (hl.parse_locus('1:10151', 'GRCh37'), ['TA', 'T'])
        get_r_for_pair_of_variants(bm, ld_index, var1, var2)
        # 0.01789767935482124

    :param bm: Input BlockMatrix
    :param ld_index: Corresponding index table
    :param var1: Tuple of locus and alleles
    :param var2: Tuple of locus and alleles
    :return: Correlation (r) between two variants
    """
    idx1 = ld_index.filter((ld_index.locus == var1[0])
                           & (ld_index.alleles == var1[1])).idx.collect()[0]
    idx2 = ld_index.filter((ld_index.locus == var2[0])
                           & (ld_index.alleles == var2[1])).idx.collect()[0]

    if idx1 > idx2:
        temp = idx1
        idx1 = idx2
        idx2 = temp

    return bm[idx1, idx2]
Exemple #2
0
def get_r_within_gene(
    bm: BlockMatrix,
    ld_index: hl.Table,
    gene: str,
    vep_ht: hl.Table = None,
    reference_genome: str = None,
):
    """
    Get LD information (`r`) for all pairs of variants within `gene`.

    Warning: this returns a table quadratic in number of variants. Exercise caution with large genes.

    :param bm: Input Block Matrix
    :param ld_index: Corresponding index table
    :param gene: Gene symbol as string
    :param vep_ht: Table with VEP annotations (if None, gets from get_gnomad_public_data())
    :param reference_genome: Reference genome to pass to get_gene_intervals for fast filtering to gene
    :return: Table with pairs of variants
    """
    if vep_ht is None:
        vep_ht = public_release("exomes").ht()
    if reference_genome is None:
        reference_genome = hl.default_reference().name
    intervals = hl.experimental.get_gene_intervals(
        gene_symbols=[gene], reference_genome=reference_genome)
    ld_index = hl.filter_intervals(ld_index, intervals)
    ld_index = ld_index.annotate(vep=vep_ht[ld_index.key].vep)
    ld_index = ld_index.filter(
        hl.any(lambda tc: tc.gene_symbol == gene,
               ld_index.vep.transcript_consequences))

    indices_to_keep = ld_index.idx.collect()
    filt_bm = bm.filter(indices_to_keep, indices_to_keep)
    ht = filt_bm.entries()
    ld_index = ld_index.add_index("new_idx").key_by("new_idx")
    return ht.transmute(r=ht.entry,
                        i_variant=ld_index[ht.i],
                        j_variant=ld_index[ht.j])