Ejemplo n.º 1
0
def test_ihh01_scan_int8_d():
    gaps = np.array([10, 10, 10], dtype='f8')
    h = np.array([[0, 0, 1, 1, 1, 0],
                  [0, 1, 0, 1, 0, 1],
                  [1, 0, 0, 0, 1, 1],
                  [0, 0, 0, 1, 1, 1]], dtype='i1')

    ihh0, ihh1 = ihh01_scan_int8(h, gaps, min_ehh=0.05)
    x = (10 * (1 + 1 / 3) / 2) + (10 * (1 / 3 + 0) / 2)
    expect_ihh0 = [np.nan, np.nan, x, x]
    assert_array_nanclose(expect_ihh0, ihh0)
    expect_ihh1 = [np.nan, np.nan, x, x]
    assert_array_nanclose(expect_ihh1, ihh1)

    ihh0, ihh1 = ihh01_scan_int8(h, gaps, min_ehh=0)
    expect_ihh0 = [np.nan, np.nan, x, x]
    assert_array_nanclose(expect_ihh0, ihh0)
    expect_ihh1 = [np.nan, np.nan, x, x]
    assert_array_nanclose(expect_ihh1, ihh1)

    ihh0, ihh1 = ihh01_scan_int8(h, gaps, min_ehh=0, include_edges=True)
    expect_ihh0 = [0, 10 * 2 / 3, x, x]
    assert_array_nanclose(expect_ihh0, ihh0)
    expect_ihh1 = [0, 10 * 2 / 3, x, x]
    assert_array_nanclose(expect_ihh1, ihh1)
Ejemplo n.º 2
0
def ihs(h, pos, min_ehh=0.05):
    """Compute the unstandardized integrated haplotype score (IHS) for each
    variant, comparing integrated haplotype homozygosity between the
    reference and alternate alleles.

    Parameters
    ----------
    h : array_like, int, shape (n_variants, n_haplotypes)
        Haplotype array.
    pos : array_like, int, shape (n_variants,)
        Variant positions on physical or genetic map.
    min_ehh: float, optional
        Minimum EHH beyond which to truncate integrated haplotype
        homozygosity calculation.

    Returns
    -------
    score : ndarray, float, shape (n_variants,)
        Unstandardized IHS scores.

    Notes
    -----

    This function will calculate IHS for all variants. To exclude variants
    below a given minor allele frequency, filter the input haplotype array
    before passing to this function.

    This function computes IHS comparing the reference and alternate alleles.
    These can be polarised by switching the sign for any variant where the
    reference allele is derived.

    This function returns NaN for any IHS calculations where haplotype
    homozygosity does not decay below `min_ehh` before reaching the first or
    last variant. To disable this behaviour, set `min_ehh` to None.

    This function currently does nothing to account for large gaps between
    variants. There will be edge effects near any large gaps.

    Note that the unstandardized score is returned. Usually these scores are
    then normalised in different allele frequency bins.

    """

    from allel.opt.stats import ihh01_scan_int8

    # scan forward
    ihh0_fwd, ihh1_fwd = ihh01_scan_int8(h, pos, min_ehh=min_ehh)

    # scan backward
    ihh0_rev, ihh1_rev = ihh01_scan_int8(h[::-1], pos[::-1], min_ehh=min_ehh)
    ihh0_rev = ihh0_rev[::-1]
    ihh1_rev = ihh1_rev[::-1]

    # compute unstandardized score
    ihh0 = ihh0_fwd + ihh0_rev
    ihh1 = ihh1_fwd + ihh1_rev
    score = np.log(ihh1 / ihh0)

    return score
Ejemplo n.º 3
0
def test_ihh01_scan_int8_a():
    gaps = np.array([10, 10, 10], dtype='f8')
    h = np.array([[0, 0, 1], [0, 1, 1], [1, 1, 0], [1, 0, 0]], dtype='i1')

    ihh0, ihh1 = ihh01_scan_int8(h, gaps, min_ehh=0.05, include_edges=False)
    expect_ihh0 = [np.nan, np.nan, np.nan, 5]
    assert_array_nanclose(expect_ihh0, ihh0)
    expect_ihh1 = [np.nan, 5, 5, np.nan]
    assert_array_nanclose(expect_ihh1, ihh1)

    ihh0, ihh1 = ihh01_scan_int8(h, gaps, min_ehh=0, include_edges=True)
    expect_ihh0 = [0, np.nan, np.nan, 5]
    assert_array_nanclose(expect_ihh0, ihh0)
    expect_ihh1 = [np.nan, 5, 5, np.nan]
    assert_array_nanclose(expect_ihh1, ihh1)
Ejemplo n.º 4
0
def test_ihh01_scan_int8_c():
    gaps = np.array([10, 10, 10], dtype='f8')
    h = np.array([[0, 0, 0, 1, 1, 1],
                  [0, 0, 0, 1, 1, 1],
                  [0, 0, 0, 1, 1, 1],
                  [0, 0, 0, 1, 1, 1]], dtype='i1')

    ihh0, ihh1 = ihh01_scan_int8(h, gaps, min_ehh=0.05)
    expect_ihh0 = [np.nan, np.nan, np.nan, np.nan]
    assert_array_nanclose(expect_ihh0, ihh0)
    expect_ihh1 = [np.nan, np.nan, np.nan, np.nan]
    assert_array_nanclose(expect_ihh1, ihh1)

    ihh0, ihh1 = ihh01_scan_int8(h, gaps, min_ehh=0, include_edges=True)
    expect_ihh0 = [0, 10, 20, 30]
    assert_array_nanclose(expect_ihh0, ihh0)
    expect_ihh1 = [0, 10, 20, 30]
    assert_array_nanclose(expect_ihh1, ihh1)
Ejemplo n.º 5
0
def test_ihh01_scan_int8_e():
    # min_maf
    gaps = np.array([10, 10], dtype='f8')
    h = np.array([[0, 0, 1],
                  [0, 0, 1],
                  [0, 0, 1]], dtype='i1')

    expect_ihh0 = [0, 10, 20]
    expect_ihh1 = [np.nan, np.nan, np.nan]
    ihh0, ihh1 = ihh01_scan_int8(h, gaps, min_ehh=0,
                                 min_maf=0, include_edges=True)
    assert_array_nanclose(expect_ihh0, ihh0)
    assert_array_nanclose(expect_ihh1, ihh1)

    expect_ihh0 = [np.nan, np.nan, np.nan]
    expect_ihh1 = [np.nan, np.nan, np.nan]
    ihh0, ihh1 = ihh01_scan_int8(h, gaps, min_ehh=0,
                                 min_maf=0.4, include_edges=True)
    assert_array_nanclose(expect_ihh0, ihh0)
    assert_array_nanclose(expect_ihh1, ihh1)
Ejemplo n.º 6
0
def test_ihh01_scan_int8_b():
    gaps = np.array([10, 10, 10], dtype='f8')
    h = np.array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]],
                 dtype='i1')

    ihh0, ihh1 = ihh01_scan_int8(h, gaps, min_ehh=0.05, include_edges=False)
    x = (10 * (1 + 1 / 3) / 2) + (10 * (1 / 3 + 0) / 2)
    expect_ihh0 = [np.nan, np.nan, x, x]
    assert_array_nanclose(expect_ihh0, ihh0)
    expect_ihh1 = [np.nan, np.nan, np.nan, np.nan]
    assert_array_nanclose(expect_ihh1, ihh1)

    ihh0, ihh1 = ihh01_scan_int8(h, gaps, min_ehh=0, include_edges=False)
    expect_ihh0 = [np.nan, np.nan, x, x]
    assert_array_nanclose(expect_ihh0, ihh0)
    expect_ihh1 = [np.nan, np.nan, np.nan, np.nan]
    assert_array_nanclose(expect_ihh1, ihh1)

    ihh0, ihh1 = ihh01_scan_int8(h, gaps, min_ehh=0, include_edges=True)
    expect_ihh0 = [0, 10 * (1 + 1 / 3) / 2, x, x]
    assert_array_nanclose(expect_ihh0, ihh0)
    expect_ihh1 = [np.nan, np.nan, np.nan, np.nan]
    assert_array_nanclose(expect_ihh1, ihh1)
Ejemplo n.º 7
0
def test_ihh01_scan_int8_e():
    # min_maf
    gaps = np.array([10, 10], dtype='f8')
    h = np.array([[0, 0, 1], [0, 0, 1], [0, 0, 1]], dtype='i1')

    expect_ihh0 = [0, 10, 20]
    expect_ihh1 = [np.nan, np.nan, np.nan]
    ihh0, ihh1 = ihh01_scan_int8(h,
                                 gaps,
                                 min_ehh=0,
                                 min_maf=0,
                                 include_edges=True)
    assert_array_nanclose(expect_ihh0, ihh0)
    assert_array_nanclose(expect_ihh1, ihh1)

    expect_ihh0 = [np.nan, np.nan, np.nan]
    expect_ihh1 = [np.nan, np.nan, np.nan]
    ihh0, ihh1 = ihh01_scan_int8(h,
                                 gaps,
                                 min_ehh=0,
                                 min_maf=0.4,
                                 include_edges=True)
    assert_array_nanclose(expect_ihh0, ihh0)
    assert_array_nanclose(expect_ihh1, ihh1)
Ejemplo n.º 8
0
    def test_ihh01_scan_int8(self):
        from allel.opt.stats import ihh01_scan_int8
        pos = [10, 20, 30, 40]

        # case 1
        h = np.array([[0, 0, 1],
                      [0, 1, 1],
                      [1, 1, 0],
                      [1, 0, 0]], dtype='i1')

        ihh0, ihh1 = ihh01_scan_int8(h, pos, min_ehh=0.05)
        expect_ihh0 = [np.nan, np.nan, np.nan, 0]
        assert_array_nanclose(expect_ihh0, ihh0)
        expect_ihh1 = [np.nan, np.nan, 0, np.nan]
        assert_array_nanclose(expect_ihh1, ihh1)

        ihh0, ihh1 = ihh01_scan_int8(h, pos, min_ehh=None)
        expect_ihh0 = [0, np.nan, np.nan, 0]
        assert_array_nanclose(expect_ihh0, ihh0)
        expect_ihh1 = [np.nan, 0, 0, np.nan]
        assert_array_nanclose(expect_ihh1, ihh1)

        # case 2
        h = np.array([[0, 0, 0, 1],
                      [0, 0, 1, 0],
                      [0, 1, 0, 0],
                      [1, 0, 0, 0]], dtype='i1')

        ihh0, ihh1 = ihh01_scan_int8(h, pos, min_ehh=0.05)
        expect_ihh0 = [np.nan, np.nan, np.nan, 10*2/3]
        assert_array_nanclose(expect_ihh0, ihh0)
        expect_ihh1 = [np.nan, np.nan, np.nan, np.nan]
        assert_array_nanclose(expect_ihh1, ihh1)

        ihh0, ihh1 = ihh01_scan_int8(h, pos, min_ehh=None)
        expect_ihh0 = [0, 10*2/3, 10*2/3, 10*2/3]
        assert_array_nanclose(expect_ihh0, ihh0)
        expect_ihh1 = [np.nan, np.nan, np.nan, np.nan]
        assert_array_nanclose(expect_ihh1, ihh1)

        # case 3
        h = np.array([[0, 0, 0, 1, 1, 1],
                      [0, 0, 0, 1, 1, 1],
                      [0, 0, 0, 1, 1, 1],
                      [0, 0, 0, 1, 1, 1]], dtype='i1')

        ihh0, ihh1 = ihh01_scan_int8(h, pos, min_ehh=0.05)
        expect_ihh0 = [np.nan, np.nan, np.nan, np.nan]
        assert_array_nanclose(expect_ihh0, ihh0)
        expect_ihh1 = [np.nan, np.nan, np.nan, np.nan]
        assert_array_nanclose(expect_ihh1, ihh1)

        ihh0, ihh1 = ihh01_scan_int8(h, pos, min_ehh=None)
        expect_ihh0 = [0, 10, 20, 30]
        assert_array_nanclose(expect_ihh0, ihh0)
        expect_ihh1 = [0, 10, 20, 30]
        assert_array_nanclose(expect_ihh1, ihh1)

        # case 4
        h = np.array([[0, 0, 1, 1, 1, 0],
                      [0, 1, 0, 1, 0, 1],
                      [1, 0, 0, 0, 1, 1],
                      [0, 0, 0, 1, 1, 1]], dtype='i1')

        ihh0, ihh1 = ihh01_scan_int8(h, pos, min_ehh=0.05)
        expect_ihh0 = [np.nan, np.nan, np.nan, 10*2/3]
        assert_array_nanclose(expect_ihh0, ihh0)
        expect_ihh1 = [np.nan, np.nan, np.nan, 10*2/3]
        assert_array_nanclose(expect_ihh1, ihh1)

        ihh0, ihh1 = ihh01_scan_int8(h, pos, min_ehh=None)
        expect_ihh0 = [0, 10*2/3, 10*2/3, 10*2/3]
        assert_array_nanclose(expect_ihh0, ihh0)
        expect_ihh1 = [0, 10*2/3, 10*2/3, 10*2/3]
        assert_array_nanclose(expect_ihh1, ihh1)
Ejemplo n.º 9
0
def ihs(
    h,
    pos,
    map_pos=None,
    min_ehh=0.05,
    min_maf=0.05,
    include_edges=False,
    gap_scale=20000,
    max_gap=200000,
    is_accessible=None,
    use_threads=True,
):
    """Compute the unstandardized integrated haplotype score (IHS) for each
    variant, comparing integrated haplotype homozygosity between the
    reference (0) and alternate (1) alleles.

    Parameters
    ----------
    h : array_like, int, shape (n_variants, n_haplotypes)
        Haplotype array.
    pos : array_like, int, shape (n_variants,)
        Variant positions (physical distance).
    map_pos : array_like, float, shape (n_variants,)
        Variant positions (genetic map distance).
    min_ehh: float, optional
        Minimum EHH beyond which to truncate integrated haplotype
        homozygosity calculation.
    min_maf : float, optional
        Do not compute integrated haplotype homozogysity for variants with
        minor allele frequency below this value.
    include_edges : bool, optional
        If True, report scores even if EHH does not decay below `min_ehh`
        before reaching the edge of the data.
    gap_scale : int, optional
        Rescale distance between variants if gap is larger than this value.
    max_gap : int, optional
        Do not report scores if EHH spans a gap larger than this number of
        base pairs.
    is_accessible : array_like, bool, optional
        Genome accessibility array. If provided, distance between variants
        will be computed as the number of accessible bases between them.
    use_threads : bool, optional
        If True use multiple threads to compute.

    Returns
    -------
    score : ndarray, float, shape (n_variants,)
        Unstandardized IHS scores.

    Notes
    -----

    This function will calculate IHS for all variants. To exclude variants
    below a given minor allele frequency, filter the input haplotype array
    before passing to this function.

    This function computes IHS comparing the reference and alternate alleles.
    These can be polarised by switching the sign for any variant where the
    reference allele is derived.

    This function returns NaN for any IHS calculations where haplotype
    homozygosity does not decay below `min_ehh` before reaching the first or
    last variant. To disable this behaviour, set `include_edges` to True.

    Note that the unstandardized score is returned. Usually these scores are
    then standardized in different allele frequency bins.

    See Also
    --------
    standardize_by_allele_count

    """

    from allel.opt.stats import ihh01_scan_int8

    # check inputs
    h = HaplotypeArray(np.asarray(h, dtype="i1"))
    pos = asarray_ndim(pos, 1)
    check_dim0_aligned(h, pos)

    # compute gaps between variants for integration
    gaps = compute_ihh_gaps(pos, map_pos, gap_scale, max_gap, is_accessible)

    # setup kwargs
    kwargs = dict(min_ehh=min_ehh, min_maf=min_maf, include_edges=include_edges)

    if use_threads and multiprocessing.cpu_count() > 1:
        # run with threads

        # create pool
        pool = ThreadPool(2)

        # scan forward
        result_fwd = pool.apply_async(ihh01_scan_int8, (h, gaps), kwargs)

        # scan backward
        result_rev = pool.apply_async(ihh01_scan_int8, (h[::-1], gaps[::-1]), kwargs)

        # wait for both to finish
        pool.close()
        pool.join()

        # obtain results
        ihh0_fwd, ihh1_fwd = result_fwd.get()
        ihh0_rev, ihh1_rev = result_rev.get()

        # cleanup
        pool.terminate()

    else:
        # run without threads

        # scan forward
        ihh0_fwd, ihh1_fwd = ihh01_scan_int8(h, gaps, **kwargs)

        # scan backward
        ihh0_rev, ihh1_rev = ihh01_scan_int8(h[::-1], gaps[::-1], **kwargs)

    # handle reverse scan
    ihh0_rev = ihh0_rev[::-1]
    ihh1_rev = ihh1_rev[::-1]

    # compute unstandardized score
    ihh0 = ihh0_fwd + ihh0_rev
    ihh1 = ihh1_fwd + ihh1_rev
    score = np.log(ihh1 / ihh0)

    return score