예제 #1
0
def test_bgen_samples_outside_bgen_unreadable(tmp_path):
    bgen_filepath = example_filepath2("complex.23bits.bgen")
    samples_filepath = tmp_path / "complex.sample"
    copyfile(example_filepath("complex.sample"), samples_filepath)
    with noread_permission(samples_filepath):
        with pytest.raises(PermissionError):
            open_bgen(bgen_filepath, samples_filepath=samples_filepath, allow_complex=True, verbose=False)
예제 #2
0
def test_allele_expectation():
    filepath = example_filepath("example.32bits.bgen")
    with open_bgen(filepath, verbose=False) as bgen2:
        e = bgen2.allele_expectation(np.s_[bgen2.samples == "sample_005",
                                           bgen2.rsids == "RSID_6"])
        assert np.allclose(e, [[[1.01086423, 0.98913577]]])

    with pytest.raises(ValueError):
        filepath = example_filepath("haplotypes.bgen")
        with open_bgen(filepath, verbose=False) as bgen2:
            bgen2.allele_expectation()

    filepath = example_filepath("example.32bits.bgen")
    with open_bgen(filepath, verbose=False) as bgen2:
        e = bgen2.allele_expectation(np.s_[:, []])
        assert e.shape == (500, 0, 2)

    filepath = example_filepath("example.32bits.bgen")
    with open_bgen(filepath, verbose=False) as bgen2:
        e = bgen2.allele_expectation(
            np.s_[bgen2.samples == "sample_005", bgen2.rsids == "RSID_6"],
            assume_constant_ploidy=False,
        )
        assert np.allclose(e, [[[1.01086423, 0.98913577]]])

    filepath = example_filepath("example.32bits.bgen")
    with open_bgen(filepath, verbose=False) as bgen2:
        e = bgen2.allele_expectation(np.s_[:, []],
                                     assume_constant_ploidy=False)
        assert e.shape == (500, 0, 2)
예제 #3
0
def test_to_improve_coverage2():
    filepath = example_filepath2("complex.bgen")
    samplepath = example_filepath2("complex.sample")
    allow_complex = True

    metadata2_path = open_bgen._metadata_path_from_filename(
        filepath, samples_filepath=samplepath, allow_complex=allow_complex)

    if metadata2_path.exists():
        metadata2_path.unlink()
    metadata2_temp = metadata2_path.parent / (metadata2_path.name + ".temp")
    metadata2_temp.touch()  # Create an empty .temp file

    bgen2 = open_bgen(
        filepath,
        samples_filepath=samplepath,
        allow_complex=allow_complex,
        verbose=True,
    )  # Creates metadata2.mmm file
    bgen2 = open_bgen(
        filepath,
        samples_filepath=samplepath,
        allow_complex=True,
        verbose=True,
    )  # Creates metadata2.mmm file

    del bgen2
예제 #4
0
def test_close_del_with():
    filepath = example_filepath2("example.32bits.bgen")
    with open_bgen(filepath, verbose=False) as bgen2:
        pass
    with pytest.raises(ValueError):
        bgen2.read()

    bgen2 = open_bgen(filepath, verbose=False)
    bgen2.close()
    with pytest.raises(ValueError):
        bgen2.read()
예제 #5
0
def test_open_bgen_phased_genotype():
    filepath = example_filepath2("haplotypes.bgen")
    with open_bgen(filepath, verbose=False) as bgen2:

        assert_equal(bgen2.chromosomes[0], "1")
        assert_equal(bgen2.ids[0], "SNP1")
        assert_equal(bgen2.nalleles[0], 2)
        assert_equal(bgen2.allele_ids[0], "A,G")
        assert_equal(bgen2.positions[0], 1)
        assert_equal(bgen2.rsids[0], "RS1")

        assert_equal(bgen2.chromosomes[2], "1")
        assert_equal(bgen2.ids[2], "SNP3")
        assert_equal(bgen2.nalleles[2], 2)
        assert_equal(bgen2.allele_ids[2], "A,G")
        assert_equal(bgen2.positions[2], 3)
        assert_equal(bgen2.rsids[2], "RS3")

        assert_equal(bgen2.samples[0], "sample_0")
        assert_equal(bgen2.samples[2], "sample_2")
        assert_equal(bgen2.samples[-1], "sample_3")

        g = bgen2.read((0, 0))
        assert_allclose(g[0, 0, :], [1.0, 0.0, 1.0, 0.0])
        g = bgen2.read((-1, -1))
        assert_allclose(g[0, 0, :], [1.0, 0.0, 0.0, 1.0])
예제 #6
0
def random_file_tests(nsamples,
                      nvariants,
                      bits,
                      verbose=False,
                      overwrite=False):
    test_data_folder = BGEN_READER_CACHE_HOME / "test_data"
    filepath = test_data_folder / "{0}x{1}.{2}bits.bgen".format(
        nsamples, nvariants, bits)
    if overwrite or not filepath.exists():
        _write_random(
            filepath,
            nsamples,
            nvariants,
            bits=bits,
            verbose=verbose,
            cleanup_temp_files=True,
        )
    metadata2_path = open_bgen._metadata_path_from_filename(
        filepath, samples_filepath=None, allow_complex=True)
    if metadata2_path.exists():
        metadata2_path.unlink()

    with open_bgen(filepath, verbose=verbose) as bgen2:
        assert bgen2.nsamples == nsamples
        assert bgen2.nvariants == nvariants
        val = bgen2.read(-1)
        assert val.shape == (nsamples, 1, 3)
        mean = np.nanmean(val)
        assert mean != mean or (0 <= mean and mean <= 1)
예제 #7
0
def test_custom_meta_path():
    filepath = example_filepath2("example.bgen")
    custom_path = Path(BGEN_READER_CACHE_HOME, "submeta")

    try:
        os.mkdir(custom_path)
    except FileExistsError:
        pass

    custom_meta_path(custom_path)
    read_bgen(filepath)
    open_bgen(filepath)

    assert len([path for path in Path(custom_path).iterdir()]) == 2, "Failed to write files to custom directory"
    os.remove(Path(custom_path, filepath.name + ".metadata2.mmm"))
    os.remove(Path(custom_path, filepath.name + ".metafile"))
예제 #8
0
def test_dosage_example_32bits():
    filepath = example_filepath("example.32bits.bgen")
    with open_bgen(filepath, verbose=False) as bgen:
        e = bgen.allele_expectation([5, 0])
        assert_allclose(e[7, 0, :], [1.9556273911044997, 0.044372608895500334])
        assert all(isnan(e[0, 1, :]))
        assert_equal(e.shape, (500, 2, 2))
예제 #9
0
def test_bgen_file_not_readable(tmp_path):
    filepath = tmp_path / "haplotypes.bgen"
    copyfile(example_filepath2("haplotypes.bgen"), filepath)
    with noread_permission(filepath):
        with pytest.raises(PermissionError):
            with open_bgen(filepath, verbose=False) as _:
                pass
예제 #10
0
def test_zero_width():
    filepath = example_filepath("complex.bgen")
    with open_bgen(filepath, allow_complex=True, verbose=False) as bgen:
        for assume_constant_ploidy in [False, True]:
            e = bgen.allele_expectation(
                [],
                assume_constant_ploidy=assume_constant_ploidy,
            )
            f = bgen.allele_frequency(e)
            assert e.shape == (bgen.nsamples, 0, bgen.nalleles[0])
            assert f.shape == (0, bgen.nalleles[0])

            good_variants = logical_not(bgen.phased) * (bgen.nalleles == 2)
            e = bgen.allele_expectation(
                ([], good_variants),
                assume_constant_ploidy=assume_constant_ploidy,
            )
            f = bgen.allele_frequency(e)
            assert e.shape == (0, sum(good_variants), bgen.nalleles[0])
            assert_equal(f, zeros(
                (sum(good_variants), bgen.nalleles[0]
                 )))  # We define the freq of something with no samples as 0

            e = bgen.allele_expectation(
                ([], []),
                assume_constant_ploidy=assume_constant_ploidy,
            )
            f = bgen.allele_frequency(e)
            assert e.shape == (0, 0, bgen.nalleles[0])
            assert f.shape == (0, bgen.nalleles[0])
예제 #11
0
def test_read_multiple_returns():
    filepath = example_filepath2("example.32bits.bgen")
    with open_bgen(filepath, verbose=False) as bgen2:
        full, full_missing, full_ploidy = bgen2.read(return_missings=True,
                                                     return_ploidies=True)

        val, missing = bgen2.read(return_missings=True)
        assert np.allclose(full, val, equal_nan=True)
        assert np.allclose(full_missing, missing, equal_nan=False)

        ploidy = bgen2.read(return_probabilities=False, return_ploidies=True)
        assert np.allclose(full_ploidy, ploidy, equal_nan=False)

        val, missing = bgen2.read((slice(10, 30, 2), [11, 9]),
                                  return_missings=True)
        assert np.allclose(full[10:30:2, :][:, [11, 9]], val, equal_nan=True)
        assert np.allclose(full_missing[10:30:2, :][:, [11, 9]],
                           missing,
                           equal_nan=False)

        ploidy = bgen2.read(
            (slice(10, 30, 2), [11, 9]),
            return_probabilities=False,
            return_ploidies=True,
        )
        assert np.allclose(full_ploidy[10:30:2, :][:, [11, 9]],
                           ploidy,
                           equal_nan=False)
예제 #12
0
def test_coverage3():
    with pytest.raises(ValueError):
        with open_bgen(
                example_filepath2("example.bgen"),
                samples_filepath=example_filepath(
                    "complex.sample"),  # Wrong size sample file
                verbose=False,
        ) as _:
            pass

    with pytest.raises(ValueError):
        with open_bgen(
                example_filepath2("complex.bgen"),
                verbose=False,
        ) as _:
            pass
예제 #13
0
def test_read_indexing():
    filepath = example_filepath2("example.32bits.bgen")
    bgen2 = open_bgen(filepath, verbose=False)
    full = bgen2.read()

    val = bgen2.read(22)
    assert np.allclose(full[:, [22]], val, equal_nan=True)

    val = bgen2.read([22])
    assert np.allclose(full[:, [22]], val, equal_nan=True)

    val = bgen2.read([22, 30])
    assert np.allclose(full[:, [22, 30]], val, equal_nan=True)

    val = bgen2.read(slice(10, 30, 2))
    assert np.allclose(full[:, 10:30:2], val, equal_nan=True)

    bool_list = [i % 2 == 0 for i in range(bgen2.nvariants)]
    val = bgen2.read(bool_list)
    assert np.allclose(full[:, bool_list], val, equal_nan=True)

    val = bgen2.read((None, None))
    assert np.allclose(full, val, equal_nan=True)

    val = bgen2.read((22, None))
    assert np.allclose(full[[22], :], val, equal_nan=True)

    val = bgen2.read((22, [11, 9]))
    assert np.allclose(full[[22], :][:, [11, 9]], val, equal_nan=True)

    val = bgen2.read(([22, 30], [11, 9]))
    assert np.allclose(full[[22, 30], :][:, [11, 9]], val, equal_nan=True)

    val = bgen2.read((slice(10, 30, 2), [11, 9]))
    assert np.allclose(full[10:30:2, :][:, [11, 9]], val, equal_nan=True)

    bool_list = [i % 2 == 0 for i in range(bgen2.nsamples)]
    val = bgen2.read((bool_list, [11, 9]))
    assert np.allclose(full[bool_list, :][:, [11, 9]], val, equal_nan=True)

    val = bgen2.read(([-1], [-1]))
    assert np.allclose(full[-1, -1], val, equal_nan=True)

    val = bgen2.read(np.s_[10:30:2, :5])
    assert np.allclose(full[10:30:2, :5, :], val, equal_nan=True)

    # Read no variants
    val, missing, ploidy = bgen2.read([], return_missings=True, return_ploidies=True)
    assert val.shape == (bgen2.nsamples, 0, bgen2.max_combinations)
    assert missing.shape == (bgen2.nsamples, 0)
    assert ploidy.shape == (bgen2.nsamples, 0)

    # Read no samples and no variants
    val, missing, ploidy = bgen2.read(
        ([], []), return_missings=True, return_ploidies=True
    )
    assert val.shape == (0, 0, bgen2.max_combinations)
    assert missing.shape == (0, 0)
    assert ploidy.shape == (0, 0)
예제 #14
0
def test_bgen_samples_specify_samples_file():
    data = open_bgen(
        example_filepath2("complex.23bits.bgen"),
        samples_filepath=example_filepath("complex.sample"),
        verbose=False,
    )
    samples = ["sample_0", "sample_1", "sample_2", "sample_3"]
    assert all(data.samples == samples)
예제 #15
0
def test_freq():
    filepath = example_filepath("example.32bits.bgen")
    with open_bgen(filepath, verbose=False) as bgen:
        variant_index = bgen.rsids == "RSID_6"
        e = bgen.allele_expectation(variant_index)
        f = bgen.allele_frequency(e)
        assert_allclose(f[0, 0], 229.23103218810434)
        assert_allclose(f[0, 1], 270.7689678118956)
예제 #16
0
def test_bgen_samples_not_present():
    with open_bgen(
            example_filepath2("complex.23bits.no.samples.bgen"),
            allow_complex=True,
            verbose=False,
    ) as data:
        samples = ["sample_0", "sample_1", "sample_2", "sample_3"]
        assert all(data.samples == samples)
예제 #17
0
def test_coverage4(tmp_path):
    oldpwd = os.getcwd()
    filepath = example_filepath2("example.32bits.bgen")
    try:
        os.chdir(filepath.parent)
        with open_bgen(filepath.name) as bgen2:
            assert bgen2.shape == (500, 199, 3)
    finally:
        os.chdir(oldpwd)
예제 #18
0
def filtered_microarray_snps(region):
    """
    Iterate over a region returning genotypes at SNP loci.

    Returns microarray measured SNPs that have been QCed and phased
    (i.e. the ones in the bgen files) which does not include
    all microarray measured SNPs

    Yields tuples
	(genotypes, chrom, pos, alleles, locus_filtered)
    currently, locus_filtered is always None

    genotypes are pairs of indicators:
        0: 1st allele
        1: 2nd allele
    note that bgen files don't distinguish between reference and alt
    alleles. 1st allele may or may not be the reference. (This
    is different than imputed SNPs!)

    Expecting all samples in the input file to have 0 or 1 probs
    so not filtering any calls

    No filtering for samples with rare genotypes
    Loci with only single genotypes remaining are returned - it is up to
    calling code to filter these out
    """
    chrom, posses = region.split(':')
    start, end = posses.split('-')
    start = int(start)
    end = int(end)
    bgen_fname = f'{ukb}/microarray/ukb_hap_chr{chrom}_v2.bgen'
    bgen = bgen_reader.open_bgen(bgen_fname, allow_complex=True, verbose=False)

    for variant_num, pos in enumerate(bgen.positions):
        if pos < start:
            continue
        if pos > end:
            break

        probs, missing, ploidy = bgen.read(variant_num,
                                           return_missings=True,
                                           return_ploidies=True)

        # make sure the record looks as expected
        assert np.all(np.logical_or(probs == 0, probs == 1))
        assert probs.shape[2] == 4
        assert bgen.phased[variant_num]
        assert np.all(ploidy == 2)
        assert not np.any(missing)

        # Since all alleles are bialleleic, the genotype can
        # be written as the presence or absence of the second allele
        # for each haplotype for each participant
        reformatted_gts = probs[:, 0, [1, 3]]

        yield (reformatted_gts, chrom, pos, bgen.allele_ids[variant_num], None,
               None)
예제 #19
0
def test_threads():
    filepath = example_filepath("example.32bits.bgen")
    with open_bgen(filepath, verbose=False) as bgen2:
        for num_threads in [1, 2]:
            for slice in [np.s_[:, :], np.s_[:, []]]:
                val = bgen2.read(index=slice, num_threads=num_threads)
                row_count = len(bgen2.samples[slice[0]])
                col_count = len(bgen2.ids[slice[1]])
                assert val.shape == (row_count, col_count, 3)
예제 #20
0
def run(path, batch_size=1000):
    ct = 0
    with open_bgen(path) as bgen:
        n = bgen.nvariants
        print(f'Found {n} variants')
        for i in range(0, n, batch_size):
            val = bgen.read(slice(i, i + batch_size))
            ct += val.shape[0] * val.shape[1]
    print(f'Number of entries read: {ct}')
예제 #21
0
def test_bgen_samples_specify_samples_file():
    with open_bgen(
            example_filepath2("complex.23bits.bgen"),
            samples_filepath=example_filepath("complex.sample"),
            allow_complex=True,
            verbose=False,
    ) as data:
        samples = ["sample_0", "sample_1", "sample_2", "sample_3"]
        assert all(data.samples == samples)
예제 #22
0
def test_read_dtype_and_order():
    filepath = example_filepath2("example.32bits.bgen")
    with open_bgen(filepath, verbose=False) as bgen2:
        full = bgen2.read()
        assert full.dtype == np.float64
        assert full.flags["F_CONTIGUOUS"] and not full.flags["C_CONTIGUOUS"]

        val = bgen2.read(None, dtype="float32", order="C")
        assert val.dtype == np.float32
        assert val.flags["C_CONTIGUOUS"] and not val.flags["F_CONTIGUOUS"]
        assert np.allclose(full, val, atol=5e-8, equal_nan=True)
예제 #23
0
def test_to_improve_coverage():
    filepath = example_filepath2("example.32bits.bgen")
    bgen2 = open_bgen(filepath, verbose=False)  # Creates metadata2.mmm file
    assert_equal(bgen2.ncombinations[-1], 3)
    assert_equal(bgen2.phased[-1], False)
    with open_bgen(filepath) as bgen2:  # Reuses metadata2.mmm file
        assert_equal(str(bgen2), "open_bgen('{0}')".format(filepath.name))
        assert_equal(bgen2.nsamples, 500)
        assert_equal(bgen2.nvariants, 199)
        assert_equal(bgen2.shape, (500, 199, 3))
        assert_equal(bgen2.ids[-1], "SNPID_200")
        assert_equal(bgen2.rsids[-1], "RSID_200")
        assert_equal(bgen2.chromosomes[-1], "01")
        assert_equal(bgen2.positions[-1], 100001)
        assert_equal(bgen2.nalleles[-1], 2)
        assert_equal(bgen2.allele_ids[-1], "A,G")
        assert_equal(bgen2.ncombinations[-1], 3)
        assert_equal(bgen2.phased[-1], False)
        assert_equal(bgen2.samples[-1], "sample_500")

        b = [
            0.97970582847010945215516,
            0.01947019668749305418287,
            0.00082397484239749366197,
        ]
        g = bgen2.read((2, 1))
        assert_allclose(g[0, 0, :], b)

        g = bgen2.read()
        assert_allclose(g[2, 1, :], b)

    # confirm that out-of-date metadata2 file will be updated
    metadata2 = bgen2._metadata2_path
    del bgen2
    assert os.path.getmtime(metadata2) >= os.path.getmtime(filepath)
    filepath.touch()
    assert os.path.getmtime(metadata2) <= os.path.getmtime(filepath)
    bgen2 = open_bgen(filepath, verbose=False)  # Creates metadata2.mmm file
    del bgen2
    assert os.path.getmtime(metadata2) >= os.path.getmtime(filepath)
예제 #24
0
파일: pgs.py 프로젝트: AlexTISYoung/SNIPar
def compute(pgs, bedfile=None, bgenfile=None, par_gts_f=None, ped=None, sib=False, compute_controls=False, verbose=True):
    """Compute a polygenic score (PGS) for the individuals with observed genotypes and observed/imputed parental genotypes.

    Args:
        par_gts_f : :class:`str`
            path to HDF5 file with imputed parental genotypes
        gts_f : :class:`str`
            path to bed file with observed genotypes
        pgs : :class:`snipar.pgs`
            the PGS, defined by the weights for a set of SNPs and the alleles of those SNPs
        sib : :class:`bool`
            Compute the PGS for genotyped individuals with at least one genotyped sibling and observed/imputed parental genotypes. Default False.
        compute_controls : :class:`bool`
            Compute polygenic scores for control families (families with observed parental genotypes set to missing). Default False.

    Returns:
        pg : :class:`snipar.gtarray`
            Return the polygenic score as a genotype array with columns: individual's PGS, mean of their siblings' PGS, observed/imputed paternal PGS,
            observed/imputed maternal PGS

    """
    # Check for SNP overlap
    if bedfile is not None:
        bed = Bed(bedfile, count_A1=True)
        snp_ids = bed.sid
    if bgenfile is not None:
        bgen = open_bgen(bgenfile)
        snp_ids = bgen.ids
        if np.unique(snp_ids).shape[0] == 1:
            snp_ids = bgen.rsids
    snp_set = set(snp_ids)
    in_snp_set = np.array([x in snp_set for x in pgs.snp_ids])
    if np.sum(in_snp_set)==0:
        print('No overlap between variants in weights file and observed genotypes')
        return None
    else:
        # Get genotype matrix
        G = get_gts_matrix(bedfile=bedfile, bgenfile=bgenfile, par_gts_f=par_gts_f, ped=ped, snp_ids=pgs.snp_ids, sib=sib, compute_controls=compute_controls, verbose=verbose)
        if sib:
            cols = np.array(['proband', 'sibling', 'paternal', 'maternal'])
        else:
            cols = np.array(['proband', 'paternal', 'maternal'])
        if compute_controls:
            pgs_out = [pgs.compute(x,cols) for x in G[0:3]]
            if sib:
                o_cols = np.array(['proband', 'sibling', 'parental'])
            else:
                o_cols = np.array(['proband','parental'])
            pgs_out.append(pgs.compute(G[3], o_cols))
            return pgs_out
        else:
            return pgs.compute(G,cols)
예제 #25
0
def test_dosage1():
    filepath = example_filepath("example.32bits.bgen")
    with open_bgen(filepath, verbose=False) as bgen:
        variant_index = 3
        e = bgen.allele_expectation(variant_index)
        # Compute the dosage when considering the allele
        # in position 1 as the reference/alternative one.
        alt_allele_index = 1
        dosage = e[..., alt_allele_index]
        # Print the dosage of the first five samples only.
        # print(dosage[:5])
        assert_allclose(dosage[:2, 0],
                        [1.9618530841455453, 0.009826655967586362])
예제 #26
0
def test_open_bgen_complex():
    filepath = example_filepath2("complex.23bits.bgen")
    with open_bgen(filepath, allow_complex=True, verbose=False) as bgen2:

        assert_equal(bgen2.chromosomes[0], "01")
        assert_equal(bgen2.ids[0], "")
        assert_equal(bgen2.nalleles[0], 2)
        assert_equal(bgen2.allele_ids[0], "A,G")
        assert_equal(bgen2.positions[0], 1)
        assert_equal(bgen2.rsids[0], "V1")

        assert_equal(bgen2.chromosomes[7], "01")
        assert_equal(bgen2.ids[7], "")
        assert_equal(bgen2.nalleles[7], 7)
        assert_equal(bgen2.allele_ids[7], "A,G,GT,GTT,GTTT,GTTTT,GTTTTT")
        assert_equal(bgen2.positions[7], 8)
        assert_equal(bgen2.rsids[7], "M8")

        assert_equal(bgen2.chromosomes[-1], "01")
        assert_equal(bgen2.ids[-1], "")
        assert_equal(bgen2.nalleles[-1], 2)
        assert_equal(bgen2.allele_ids[-1], "A,G")
        assert_equal(bgen2.positions[-1], 10)
        assert_equal(bgen2.rsids[-1], "M10")

        assert_equal(bgen2.samples[0], "sample_0")
        assert_equal(bgen2.samples[3], "sample_3")

        g = bgen2.read((0, 0))
        assert_allclose(g[0, 0, :2], [1, 0])
        assert isnan(g[0, 0, 2])

        g = bgen2.read((1, 0))
        assert_allclose(g[0, 0, :3], [1, 0, 0])

        g = bgen2.read((-1, -1))
        assert_allclose(g[0, 0, :5], [0, 0, 0, 1, 0])

        ploidy = bgen2.read(0,
                            return_probabilities=False,
                            return_ploidies=True)
        assert_allclose(ploidy[:, 0], [1, 2, 2, 2])
        ploidy = bgen2.read(-1,
                            return_probabilities=False,
                            return_ploidies=True)
        assert_allclose(ploidy[:, 0], [4, 4, 4, 4])

        assert_equal(bgen2.phased.dtype.name, "bool")
        ideal = array(
            [False, True, True, False, True, True, True, True, False, False])
        assert array_equal(bgen2.phased, ideal)
예제 #27
0
 def __init__(
     self,
     filepath: Union[str, Path],
     samples_filepath: Optional[Union[str, Path]] = None,
     allow_complex: bool = False,
     verbose: bool = True,
 ):
     self._bgen = bgen_reader.open_bgen(filepath,
                                        allow_complex=allow_complex,
                                        verbose=verbose)
     self._samples = self._read_sample(samples_filepath)
     if self._bgen.samples.shape[0] != self._samples.shape[0]:
         raise ValueError(
             'sample file length and bgen file length do not match.')
예제 #28
0
def open_bgen(filename: str, verbose: bool = False) -> bgen_reader.open_bgen:
    """Wrapper of bgen_reader.open_bgen that checks if sample ids make sense."""
    bgen = bgen_reader.open_bgen(filename, verbose=verbose)
    if bgen.samples[0] == 'sample_0':
        print(
            'WARNING: Sample ids in bgen file are generic. Trying to read the corresponding .sample file ...'
        )
        samples_filepath = filename[:-4] + 'sample'
        if not path.exists(samples_filepath):
            raise FileNotFoundError(f'{samples_filepath} does not exist.')
        bgen = _bgen(filename,
                     verbose=verbose,
                     samples_filepath=samples_filepath)
    return bgen
예제 #29
0
def test_read_max_combinations():
    filepath = example_filepath2("example.32bits.bgen")
    with open_bgen(filepath, verbose=False) as bgen2:
        assert (np.mean(np.isnan(bgen2.read())) < 2.1e-05
                )  # main data as only a few missing
        val = bgen2.read(max_combinations=5)
        assert (np.mean(np.isnan(val[:, :, :3])) < 2.1e-05
                )  # main data as only a few missing
        assert np.all(np.isnan(val[:, :, 3:]))  # all the extra are NaN
        with pytest.raises(ValueError):
            bgen2.read(max_combinations=2)
        with pytest.raises(ValueError):
            bgen2.read(max_combinations=1)
        with pytest.raises(ValueError):
            bgen2.read(max_combinations=0)
예제 #30
0
def test_open_bgen_variants_info():
    filepath = example_filepath2("example.32bits.bgen")
    with open_bgen(filepath, verbose=False) as bgen2:

        assert_equal(bgen2.chromosomes[0], "01")
        assert_equal(bgen2.ids[0], "SNPID_2")
        assert_equal(bgen2.nalleles[0], 2)
        assert_equal(bgen2.allele_ids[0], "A,G")
        assert_equal(bgen2.positions[0], 2000)
        assert_equal(bgen2.rsids[0], "RSID_2")

        assert_equal(bgen2.chromosomes[7], "01")
        assert_equal(bgen2.ids[7], "SNPID_9")
        assert_equal(bgen2.nalleles[7], 2)
        assert_equal(bgen2.allele_ids[7], "A,G")
        assert_equal(bgen2.positions[7], 9000)
        assert_equal(bgen2.rsids[7], "RSID_9")

        assert_equal(bgen2.chromosomes[-1], "01")
        assert_equal(bgen2.ids[-1], "SNPID_200")
        assert_equal(bgen2.nalleles[-1], 2)
        assert_equal(bgen2.allele_ids[-1], "A,G")
        assert_equal(bgen2.positions[-1], 100001)
        assert_equal(bgen2.rsids[-1], "RSID_200")

        assert_equal(bgen2.samples[0], "sample_001")
        assert_equal(bgen2.samples[7], "sample_008")
        assert_equal(bgen2.samples[-1], "sample_500")

        g = bgen2.read((0, 0))
        assert all(isnan(g[0, 0, :]))

        g = bgen2.read((1, 0))
        a = [0.027802362811705648, 0.00863673794284387, 0.9635608992454505]
        assert_allclose(g[0, 0, :], a)

        b = [
            0.97970582847010945215516,
            0.01947019668749305418287,
            0.00082397484239749366197,
        ]
        g = bgen2.read((2, 1))
        assert_allclose(g[0, 0, :], b)