コード例 #1
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_write_nan_properties(shared_datadir, tmp_path):
    with open_bed(shared_datadir / "small.bed") as bed:
        val = bed.read()
        properties = bed.properties
        chrom = bed.chromosome.copy()
        chrom[bed.chromosome == "Y"] = 0
        chrom = np.array(chrom, dtype="float")
        chrom2 = chrom.copy()
        chrom2[chrom2 == 0] = np.nan
        cm_p = bed.cm_position.copy()
        cm_p[cm_p < 3000] = 0
        cm_p2 = cm_p.copy()
        cm_p2[cm_p == 0] = np.nan
        properties["chromosome"] = chrom2
        properties["cm_position"] = cm_p2

    output_file = tmp_path / "nan.bed"
    to_bed(output_file, val, properties=properties)

    with open_bed(output_file) as bed2:
        assert np.array_equal(bed2.chromosome, ["1.0", "1.0", "5.0", "0"])
        assert np.array_equal(bed2.cm_position, cm_p)

    with open_bed(
            shared_datadir / "small.bed",
            properties={
                "chromosome": chrom2,
                "cm_position": cm_p2
            },
    ) as bed3:
        assert np.array_equal(bed3.chromosome, ["1.0", "1.0", "5.0", "0"])
        assert np.array_equal(bed3.cm_position, cm_p)
コード例 #2
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_bed_int8(tmp_path, shared_datadir):
    with open_bed(shared_datadir / "some_missing.bed") as bed:
        for force_python_only in [False, True]:
            for order in ["F", "C"]:
                val = bed.read(dtype="int8",
                               force_python_only=force_python_only,
                               order=order)
                assert val.dtype == np.int8
                assert (val.flags["C_CONTIGUOUS"]
                        and order == "C") or (val.flags["F_CONTIGUOUS"]
                                              and order == "F")
                ref_val = reference_val(shared_datadir)
                ref_val[ref_val != ref_val] = -127
                ref_val = ref_val.astype("int8")
                assert np.array_equal(ref_val, val)
                output = str(tmp_path / "int8.bed")
                for count_A1 in [False, True]:
                    to_bed(
                        output,
                        ref_val,
                        count_A1=count_A1,
                        force_python_only=force_python_only,
                    )
                    with open_bed(output, count_A1=count_A1) as bed2:
                        assert np.array_equal(
                            bed2.read(dtype="int8",
                                      force_python_only=force_python_only),
                            ref_val,
                        )
コード例 #3
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_c_reader_bed(shared_datadir):
    for force_python_only in [False, True]:
        bed = open_bed(shared_datadir / "some_missing.bed", count_A1=False)

        val = bed.read(order="F", force_python_only=force_python_only)
        assert val.dtype == np.float32
        ref_val = reference_val(shared_datadir)
        ref_val = ref_val * -1 + 2
        assert np.allclose(ref_val,
                           val,
                           rtol=1e-05,
                           atol=1e-05,
                           equal_nan=True)

        val = bed.read(order="F", dtype="int8", force_python_only=False)
        assert val.dtype == np.int8
        ref_val[ref_val != ref_val] = -127
        ref_val = ref_val.astype("int8")
        assert np.all(ref_val == val)

        del bed

        with open_bed(shared_datadir / "some_missing.bed") as bed:
            val = bed.read(order="F",
                           dtype="float64",
                           force_python_only=force_python_only)
            ref_val = reference_val(shared_datadir)
            assert np.allclose(ref_val,
                               val,
                               rtol=1e-05,
                               atol=1e-05,
                               equal_nan=True)
コード例 #4
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_iid_sid_count(shared_datadir):
    iid_count_ref, sid_count_ref = open_bed(
        shared_datadir / "plink_sim_10s_100v_10pmiss.bed").shape
    assert (iid_count_ref, sid_count_ref) == open_bed(
        shared_datadir / "plink_sim_10s_100v_10pmiss.bed",
        iid_count=iid_count_ref).shape
    assert (iid_count_ref, sid_count_ref) == open_bed(
        shared_datadir / "plink_sim_10s_100v_10pmiss.bed",
        sid_count=sid_count_ref).shape
    assert (iid_count_ref, sid_count_ref) == open_bed(
        shared_datadir / "plink_sim_10s_100v_10pmiss.bed",
        iid_count=iid_count_ref,
        sid_count=sid_count_ref,
    ).shape
コード例 #5
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_write1_bed_f64cpp(tmp_path, shared_datadir):
    with open_bed(shared_datadir / "some_missing.bed") as bed:
        for iid_index in [0, 1, 5]:
            for force_python_only in [False, True]:
                val = bed.read(
                    np.s_[0:iid_index, :],
                    order="F",
                    dtype=np.float64,
                    force_python_only=force_python_only,
                )
                assert val.shape == (iid_index, 100)
                output = str(tmp_path / f"toydata.F64cpp.{iid_index}")
                to_bed(output, val, count_A1=False)
                val2 = open_bed(output, count_A1=False).read(dtype="float64")
                assert np.allclose(val, val2, equal_nan=True)
コード例 #6
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_writes_small(tmp_path):
    output_file = tmp_path / "small.bed"

    val = [[1.0, 0, np.nan, 0], [2, 0, np.nan, 2], [0, 1, 2, 0]]

    properties = {
        "fid": ["fid1", "fid1", "fid2"],
        "iid": ["iid1", "iid2", "iid3"],
        "father": ["iid23", "iid23", "iid22"],
        "mother": ["iid34", "iid34", "iid33"],
        "sex": [1, 2, 0],
        "pheno": ["red", "red", "blue"],
        "chromosome": ["1", "1", "5", "Y"],
        "sid": ["sid1", "sid2", "sid3", "sid4"],
        "cm_position": [100.4, 2000.5, 4000.7, 7000.9],
        "bp_position": [1, 100, 1000, 1004],
        "allele_1": ["A", "T", "A", "T"],
        "allele_2": ["A", "C", "C", "G"],
    }

    to_bed(output_file, val, properties=properties)

    with open_bed(output_file) as bed:
        assert np.allclose(bed.read(), val, equal_nan=True)
        for key, value in bed.properties.items():
            assert np.array_equal(value, properties[key]) or np.allclose(
                value, properties[key])
コード例 #7
0
    def do_mass_univariate_test(
        self,
        bed_file: Optional[Union[str, bed_reader.open_bed]] = None,
        pheno_file: Optional[Union[str, pd.DataFrame]] = None,
        covar_file: Optional[Union[str, pd.DataFrame]] = None,
        pheno_covar_file: Optional[Union[str, pd.DataFrame]] = None,
        phenotype: str = 'WM_sum',
        covariate: List[str] = [
            'euro_Anc_PC1', 'euro_Anc_PC2', 'euro_Anc_PC3', 'GA_vol',
            'PMA_vol', 'Gender', 'Intracranial_Imperial'
        ]
    ) -> pd.DataFrame:

        if not hasattr(self, 'bed_file') or bed_file != None:
            if type(bed_file) == str:
                self.bed_file = bed_reader.open_bed(bed_file)
            else:
                assert type(bed_file) == bed_reader.open_bed
                self.bed_file = bed_file

        self.snp_association = mass_univariate_test(
            bed_file=self.bed_file,
            pheno_file=pheno_file,
            covar_file=covar_file,
            pheno_covar_file=pheno_covar_file,
            snps_list=self.snps_list,
            phenotype=phenotype,
            covariate=covariate)

        return self.snp_association
コード例 #8
0
    def _open_bed_if_needed(self):
        if self._open_bed is not None:
            return

        properties = {
            "father": None,
            "mother": None,
            "sex": None,
            "pheno": None,
            "allele_1": None,
            "allele_2": None,
        }
        if self._original_iid is not None:
            properties["fid"] = self._original_iid[:, 0]
            properties["iid"] = self._original_iid[:, 1]
        if self._original_sid is not None:
            properties["sid"] = self._original_sid
        if self._original_pos is not None:
            properties["chromosome"] = self._original_pos[:, 0]
            properties["cm_position"] = self._original_pos[:, 1]
            properties["bp_position"] = self._original_pos[:, 2]

        self._open_bed = open_bed(
            self.filename,
            properties=properties,
            skip_format_check=self._skip_format_check,
            count_A1=self.count_A1,
            num_threads=self._num_threads,
            fam_filepath=self.fam_filename,
            bim_filepath=self.bim_filename,
        )
コード例 #9
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_sample_file():
    from bed_reader import open_bed, sample_file

    file_name = sample_file("small.bed")
    with open_bed(file_name) as bed:
        print(bed.iid)
        print(bed.sid)
        print(bed.read())
コード例 #10
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_write12(tmp_path):
    # ===================================
    #    Starting main function
    # ===================================
    logging.info("starting 'test_writes'")
    np.random.seed(0)
    output_template = str(tmp_path / "writes.{0}.bed")
    i = 0
    for row_count in [0, 5, 2, 1]:
        for col_count in [0, 4, 2, 1]:
            val = np.random.randint(0, 4, size=(row_count, col_count)) * 1.0
            val[val == 3] = np.NaN
            row0 = ["0", "1", "2", "3", "4"][:row_count]
            row1 = ["0", "1", "2", "3", "4"][:row_count]
            col = ["s0", "s1", "s2", "s3", "s4"][:col_count]
            for is_none in [True, False]:
                properties = {"fid": row0, "iid": row1, "sid": col}
                if is_none:
                    col_prop012 = [x for x in range(5)][:col_count]
                    properties["chromosome"] = col_prop012
                    properties["bp_position"] = col_prop012
                    properties["cm_position"] = col_prop012
                else:
                    col_prop012 = None

                filename = output_template.format(i)
                logging.info(filename)
                i += 1
                to_bed(filename, val, properties=properties)
                for subsetter in [None, np.s_[::2, ::3]]:
                    with open_bed(filename) as bed:
                        val2 = bed.read(index=subsetter,
                                        order="C",
                                        dtype="float32")
                        if subsetter is None:
                            expected = val
                        else:
                            expected = val[subsetter[0], :][:, subsetter[1]]
                        assert np.allclose(val2, expected, equal_nan=True)
                        assert np.array_equal(bed.fid,
                                              np.array(row0, dtype="str"))
                        assert np.array_equal(bed.iid,
                                              np.array(row1, dtype="str"))
                        assert np.array_equal(bed.sid,
                                              np.array(col, dtype="str"))
                        if col_prop012 is not None:
                            assert np.array_equal(
                                bed.chromosome,
                                np.array(col_prop012, dtype="str"))
                            assert np.array_equal(bed.bp_position,
                                                  np.array(col_prop012))
                            assert np.array_equal(bed.cm_position,
                                                  np.array(col_prop012))
                try:
                    os.remove(filename)
                except Exception:
                    pass
    logging.info("done with 'test_writes'")
コード例 #11
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_write1(tmp_path, shared_datadir):
    in_file = shared_datadir / "plink_sim_10s_100v_10pmiss.bed"
    out_file = tmp_path / "out.bed"
    with open_bed(in_file) as bed:
        val0 = bed.read()
        properties0 = {
            "fid": bed.fid,
            "iid": bed.iid,
            "sid": bed.sid,
            "chromosome": bed.chromosome,
            "cm_position": bed.cm_position,
            "bp_position": bed.bp_position,
        }
        to_bed(out_file, val0, properties=properties0)
        with open_bed(out_file) as bed1:
            assert np.allclose(val0, bed1.read(), equal_nan=True)
            assert np.array_equal(bed.fid, properties0["fid"])
            assert np.array_equal(bed.iid, properties0["iid"])
            assert np.array_equal(bed.sid, properties0["sid"])
            assert np.issubdtype(bed.sid.dtype, np.str_)
            assert np.array_equal(bed.chromosome, properties0["chromosome"])
            assert np.allclose(bed.cm_position, properties0["cm_position"])
            assert np.allclose(bed.bp_position, properties0["bp_position"])

    val_float = val0.astype("float")
    val_float[0, 0] = 0.5

    for force_python_only in [False, True]:
        with pytest.raises(ValueError):
            to_bed(
                out_file,
                val_float,
                properties=properties0,
                force_python_only=force_python_only,
            )
    val_int8 = val0.astype("int8")
    val_int8[0, 0] = -1
    for force_python_only in [False, True]:
        with pytest.raises(ValueError):
            to_bed(
                out_file,
                val_int8,
                properties=properties0,
                force_python_only=force_python_only,
            )
コード例 #12
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_coverage3(shared_datadir, tmp_path):
    with open_bed(shared_datadir / "small.bed",
                  properties={"sex": [1.0, np.nan, 1.0, 2.0]}) as bed:
        assert np.array_equal(bed.sex, np.array([1, 0, 1, 2]))

    with open_bed(
            shared_datadir / "small.bed",
            properties={"cm_position": [1000.0, np.nan, 2000.0, 3000.0]},
    ) as bed:
        assert np.array_equal(bed.cm_position, np.array([1000, 0, 2000, 3000]))

    list = [1.0, 0, np.nan, 0]
    output_file = tmp_path / "1d.bed"
    with pytest.raises(ValueError):
        to_bed(output_file, list)
    to_bed(output_file, np.array([list], dtype=np.float16))
    with open_bed(output_file) as bed:
        assert np.allclose(bed.read(), [list], equal_nan=True)
コード例 #13
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_write1_x_x_cpp(tmp_path, shared_datadir):
    for count_A1 in [False, True]:
        with open_bed(shared_datadir / "some_missing.bed",
                      count_A1=count_A1) as bed:
            for order in ["C", "F"]:
                for dtype in [np.float32, np.float64]:
                    val = bed.read(order=order, dtype=dtype)
                    properties = bed.properties
                    val[-1, 0] = float("NAN")
                    output = str(tmp_path / "toydata.{0}{1}.cpp".format(
                        order, "32" if dtype == np.float32 else "64"))
                    to_bed(output,
                           val,
                           properties=properties,
                           count_A1=count_A1)
                    val2 = open_bed(output,
                                    count_A1=count_A1).read(dtype=dtype)
                    assert np.allclose(val, val2, equal_nan=True)
コード例 #14
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_coverage2(shared_datadir, tmp_path):
    with open_bed(shared_datadir / "plink_sim_10s_100v_10pmiss.bed",
                  properties={"iid": None}) as bed:
        assert bed.iid is None
    with pytest.raises(ValueError):
        open_bed(
            shared_datadir / "plink_sim_10s_100v_10pmiss.bed",
            properties={
                "iid": [1, 2, 3],
                "mother": [1, 2]
            },
        )
    val = np.zeros((3, 5))[::2]
    assert not val.flags["C_CONTIGUOUS"] and not val.flags["F_CONTIGUOUS"]
    with pytest.raises(ValueError):
        to_bed(tmp_path / "ignore", val)
    val = np.zeros((3, 5), dtype=np.str_)
    with pytest.raises(ValueError):
        to_bed(tmp_path / "ignore", val)
コード例 #15
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_zero_files(tmp_path):
    for force_python_only in [False, True]:
        for iid_count in [3, 0]:
            for sid_count in [0, 5]:
                for dtype in [np.int8, np.float32, np.float64]:
                    val = np.zeros((iid_count, sid_count), dtype=dtype)
                    if iid_count * sid_count > 0:
                        val[0, 0] = 2
                        val[0,
                            1] = -127 if np.dtype(dtype) == np.int8 else np.nan
                    filename = str(tmp_path / "zero_files.bed")

                    # Write
                    to_bed(filename, val, force_python_only=force_python_only)

                    # Read
                    with open_bed(filename) as bed2:
                        val2 = bed2.read(dtype=dtype)
                        assert np.allclose(val, val2, equal_nan=True)
                        properties2 = bed2.properties
                        for prop in properties2.values():
                            assert len(prop) in {iid_count, sid_count}

                    # Change properties and write again
                    if iid_count > 0:
                        properties2["iid"][0] = "iidx"
                    if sid_count > 0:
                        properties2["sid"][0] = "sidx"
                    to_bed(
                        filename,
                        val2,
                        properties=properties2,
                        force_python_only=force_python_only,
                    )

                    # Read again
                    with open_bed(filename) as bed3:
                        val3 = bed3.read(dtype=dtype)
                        assert np.allclose(val, val3, equal_nan=True)
                        properties3 = bed3.properties
                        for key2, value_list2 in properties2.items():
                            value_list3 = properties3[key2]
                            assert np.array_equal(value_list2, value_list3)
コード例 #16
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_threads(shared_datadir):
    ref_val_float = reference_val(shared_datadir)
    ref_val_int8 = ref_val_float.astype("int8")
    ref_val_int8[ref_val_float != ref_val_float] = -127

    for num_threads in [1, 4]:
        with open_bed(shared_datadir / "some_missing.bed",
                      num_threads=num_threads) as bed:
            val = bed.read(dtype="int8")
            assert np.allclose(ref_val_int8, val, equal_nan=True)
コード例 #17
0
def extract_new_bed_file(
    bed_file: Union[str, bed_reader.open_bed],
    snps_list: Optional[List[str]] = None,
    fid_list: Optional[Union[pd.DataFrame, str, List[str]]] = None
) -> bed_reader.open_bed:
    """[Return a new bed_file object, with selected individual and selected genotyped data]

    Args:
        snps_list (Optional[List[str]]): [list of snps]
        fid_list (Optional[Union[pd.DataFrame, str, List[str]]]): [list of fid, can be either pd.Dataframe, and it will look for FID column, or path to the dataframe.]
        bed_file (Union[str, bed_reader.open_bed]): [the bed file, or path to the bed file]

    Returns:
        bed_reader.open_bed: [description]
    """
    if type(bed_file) == str:
        bed_file = bed_reader.open_bed(bed_file)
    else:
        assert type(bed_file) == bed_reader.open_bed
    if type(fid_list) == str:
        fid_list = pd.read_table(fid_list, sep=' ')
        fid_list = fid_list.FID.to_list()
    if type(fid_list) == pd.DataFrame:
        fid_list = fid_list.FID.to_list()

    # always when reading bed file assign the value to genotype attribute
    if not hasattr(bed_file, 'genotype'):
        bed_file.genotype = bed_file.read()
    if snps_list != None:  # change the bed_file SNPs and associated

        new_sid = [
            idx for idx, sid in enumerate(bed_file.sid) if sid in snps_list
        ]
        bed_file.genotype = bed_file.genotype[:, new_sid]
        # can change the properties dictionary but not the attributes
        bed_file.properties_dict['sid'] = bed_file.sid[new_sid]
        bed_file.properties_dict['chromosome'] = bed_file.chromosome[new_sid]
        bed_file.properties_dict['cm_position'] = bed_file.cm_position[new_sid]
        bed_file.properties_dict['bp_position'] = bed_file.bp_position[new_sid]
        bed_file.properties_dict['allele_1'] = bed_file.allele_1[new_sid]
        bed_file.properties_dict['allele_2'] = bed_file.allele_2[new_sid]

    if fid_list != None:  # change the bed_file ID info
        new_fid = [idx for idx, i in enumerate(bed_file.fid) if i in fid_list]
        bed_file.properties_dict['fid'] = bed_file.fid[new_fid]
        bed_file.properties_dict['iid'] = bed_file.iid[new_fid]
        bed_file.properties_dict['father'] = bed_file.father[new_fid]
        bed_file.properties_dict['mother'] = bed_file.mother[new_fid]
        bed_file.properties_dict['sex'] = bed_file.sex[new_fid]
        bed_file.properties_dict['pheno'] = bed_file.pheno[new_fid]
        bed_file.genotype = bed_file.genotype[new_fid, :]

    return bed_file
コード例 #18
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_index(shared_datadir):
    ref_val_float = reference_val(shared_datadir)

    with open_bed(shared_datadir / "some_missing.bed") as bed:
        val = bed.read()
        assert np.allclose(ref_val_float, val, equal_nan=True)

        val = bed.read(2)
        assert np.allclose(ref_val_float[:, [2]], val, equal_nan=True)

        val = bed.read((2))
        assert np.allclose(ref_val_float[:, [2]], val, equal_nan=True)

        val = bed.read((None, 2))
        assert np.allclose(ref_val_float[:, [2]], val, equal_nan=True)

        val = bed.read((1, 2))
        assert np.allclose(ref_val_float[[1], [2]], val, equal_nan=True)

        val = bed.read([2, -2])
        assert np.allclose(ref_val_float[:, [2, -2]], val, equal_nan=True)

        val = bed.read(([1, -1], [2, -2]))
        assert np.allclose(ref_val_float[[1, -1], :][:, [2, -2]],
                           val,
                           equal_nan=True)

        iid_bool = ([False, False, True] * bed.iid_count)[:bed.iid_count]
        sid_bool = ([True, False, True] * bed.sid_count)[:bed.sid_count]
        val = bed.read(sid_bool)
        assert np.allclose(ref_val_float[:, sid_bool], val, equal_nan=True)

        val = bed.read((iid_bool, sid_bool))
        assert np.allclose(ref_val_float[iid_bool, :][:, sid_bool],
                           val,
                           equal_nan=True)

        val = bed.read((1, sid_bool))
        assert np.allclose(ref_val_float[[1], :][:, sid_bool],
                           val,
                           equal_nan=True)

        slicer = np.s_[::2, ::3]
        val = bed.read(slicer[1])
        assert np.allclose(ref_val_float[:, slicer[1]], val, equal_nan=True)

        val = bed.read(slicer)
        assert np.allclose(ref_val_float[slicer], val, equal_nan=True)

        val = bed.read((1, slicer[1]))
        assert np.allclose(ref_val_float[[1], slicer[1]], val, equal_nan=True)
コード例 #19
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_read1(shared_datadir):
    file = shared_datadir / "plink_sim_10s_100v_10pmiss.bed"
    with open_bed(file) as bed:
        assert bed.iid_count == 10
        assert bed.fid[-1] == "0"
        assert bed.iid[-1] == "9"
        assert bed.shape == (10, 100)

        val = bed.read(dtype="int8")
        assert (
            val.mean() == -13.142
        )  # really shouldn't do mean on data where -127 represents missing
        assert bed.chromosome[-1] == "1"
        assert bed.bp_position[-1] == 100
コード例 #20
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_env(shared_datadir):
    if platform.system() == "Darwin":
        return

    key = "MKL_NUM_THREADS"
    original_val = os.environ.get(key)
    try:
        os.environ[key] = "1"
        with open_bed(shared_datadir / "some_missing.bed") as bed:
            _ = bed.read(np.s_[:100, :100])
        os.environ[key] = "10"
        with open_bed(shared_datadir / "some_missing.bed") as bed:
            _ = bed.read(np.s_[:100, :100])
        os.environ[key] = "BADVALUE"
        with pytest.raises(ValueError):
            with open_bed(shared_datadir / "some_missing.bed") as bed:
                _ = bed.read(np.s_[:100, :100])
    finally:
        if original_val is None:
            if key in os.environ:
                del os.environ[key]
        else:
            os.environ[key] = original_val
コード例 #21
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_fam_bim_filepath(shared_datadir, tmp_path):
    with open_bed(shared_datadir / "small.bed") as bed:
        val = bed.read()
        properties = bed.properties
    output_file = tmp_path / "small.deb"
    fam_file = tmp_path / "small.maf"
    bim_file = tmp_path / "small.mib"
    to_bed(
        output_file,
        val,
        properties=properties,
        fam_filepath=fam_file,
        bim_filepath=bim_file,
    )
    assert output_file.exists() and fam_file.exists() and bim_file.exists()

    with open_bed(output_file, fam_filepath=fam_file,
                  bim_filepath=bim_file) as deb:
        val2 = deb.read()
        properties2 = deb.properties
        assert np.allclose(val, val2, equal_nan=True)
        for key in properties:
            np.array_equal(properties[key], properties2[key])
コード例 #22
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_noncontig_indexes(shared_datadir):

    with open_bed(shared_datadir / "some_missing.bed") as bed:
        whole_iid_index = np.arange(bed.iid_count)
        assert whole_iid_index.flags["C_CONTIGUOUS"]
        every_other = whole_iid_index[::2]
        assert not every_other.flags["C_CONTIGUOUS"]
        val = bed.read((every_other, -2))

        whole_iid_index = np.arange(val.shape[0])
        assert whole_iid_index.flags["C_CONTIGUOUS"]
        every_other = whole_iid_index[::2]
        assert not every_other.flags["C_CONTIGUOUS"]
        val_out = np.zeros((len(every_other), 0))
        with pytest.raises(ValueError):
            subset_f64_f64(val.reshape(-1, bed.sid_count, 1), every_other, [],
                           val_out, 1)
コード例 #23
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_nones(shared_datadir, tmp_path):

    properties = {
        "father": None,
        "mother": None,
        "sex": None,
        "pheno": None,
        "allele_1": None,
        "allele_2": None,
    }

    with open_bed(shared_datadir / "small.bed", properties=properties) as bed:
        assert np.array_equal(bed.iid, ["iid1", "iid2", "iid3"])
        assert bed.father is None

    val = [[1.0, 0, np.nan, 0], [2, 0, np.nan, 2], [0, 1, 2, 0]]
    out_file = tmp_path / "testnones.bed"
    to_bed(out_file, val, properties=properties)
コード例 #24
0
    def _read_fam(basefilename, remove_suffix, add_suffix='fam'):
        famfile = SnpReader._name_of_other_file(basefilename, remove_suffix,
                                                add_suffix)

        properties = {
            "father": None,
            "mother": None,
            "sex": None,
            "pheno": None,
        }

        logging.info("Loading {0} file {1}".format(add_suffix, famfile))
        with open_bed(basefilename,
                      fam_filepath=famfile,
                      properties=properties,
                      skip_format_check=True) as bed:
            iid = np.array([bed.fid, bed.iid]).T
        return iid
コード例 #25
0
    def __init__(
            self,
            snps_list: Union[List[str], pd.DataFrame],
            bed_file: Optional[Union[str,
                                     bed_reader.open_bed]] = None) -> None:
        if type(snps_list) == pd.DataFrame:
            self.snps_list = snps_list.SNP.to_list()

        self.updated_snps_list = False
        self.updated_bed_file = False
        if type(snps_list) == list:
            self.snps_list = snps_list
        if bed_file != None:
            if type(bed_file) == str:
                self.bed_file = bed_reader.open_bed(bed_file)
            else:
                assert type(bed_file) == bed_reader.open_bed
                self.bed_file = bed_file
コード例 #26
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_respect_read_inputs(shared_datadir):
    ref_val_float = reference_val(shared_datadir)

    ref_val_int8 = ref_val_float.astype("int8")
    ref_val_int8[ref_val_float != ref_val_float] = -127

    with open_bed(shared_datadir / "some_missing.bed") as bed:
        for order in ["F", "C"]:
            for dtype in [np.int8, np.float32, np.float64]:
                for force_python_only in [True, False]:
                    val = bed.read(order=order,
                                   dtype=dtype,
                                   force_python_only=force_python_only)
                    has_right_order = (order == "C"
                                       and val.flags["C_CONTIGUOUS"]) or (
                                           order == "F"
                                           and val.flags["F_CONTIGUOUS"])
                    assert val.dtype == dtype and has_right_order
                    ref_val = ref_val_int8 if dtype == np.int8 else ref_val_float
                    assert np.allclose(ref_val, val, equal_nan=True)
コード例 #27
0
def get_feature_SNPs(
    feature_SNP_IDs,
    cumulative_SNP_counts,
    output_file_names,
    sample_size,
    bim_SNP_names,
):
    """

    Purpose
    -------
    Each row in the causal_SNP_IDs contains one or more SNPs that transform to compute a single genetic feature.
    The purpose of this function is to return all of those SNPs' genotypes for one row in the causal_SNP_IDs file.

    Parameters
    ----------
    feature_SNP_IDs: A list of rsIDs from the input plink file (same as the output bim file) that comprise a single feature.
    cumulative_SNP_counts: the cumulative number of SNPs summed from chromosome 1 to chromosome 22 in ascending order.
    output_file_names: the names of the output bed files for all chromosomes.
    sample_size: the number of samples that have been simulated.
    bim_SNP_names: a list of SNPs from the output bim file (same as the input bim file).

    Returns
    -------
    It returns a 2D numpy array where each column is the column contains the causal genotypes for one SNP.
    """

    feature_SNPs = np.zeros((sample_size, len(feature_SNP_IDs)))
    for i, SNP_ID in enumerate(feature_SNP_IDs):
        index = np.where(SNP_ID == bim_SNP_names)[0][0]
        output_file_index = np.min(
            np.where(index <= cumulative_SNP_counts)) - 1
        output_file_reader = open_bed(output_file_names[output_file_index],
                                      count_A1=True,
                                      num_threads=1)
        adjusted_SNP_index = index - cumulative_SNP_counts[output_file_index]
        feature_SNPs[:,
                     [i]] += output_file_reader.read(index=adjusted_SNP_index,
                                                     dtype="float32")

    return feature_SNPs
コード例 #28
0
ファイル: pysnptools.py プロジェクト: ravwojdyla/sgkit-plink
 def __init__(
     self,
     path: PathType,
     shape: Tuple[int, int],
     dtype: Any = np.int8,
     count_A1: bool = True,
 ) -> None:
     # n variants (sid = SNP id), n samples (iid = Individual id)
     n_sid, n_iid = shape
     # Initialize Bed with empty arrays for axis data, otherwise it will
     # load the bim/map/fam files entirely into memory (it does not do out-of-core for those)
     self.bed = open_bed(
         path,
         count_A1=count_A1,
         iid_count=n_iid,
         sid_count=n_sid,
         num_threads=None,  # NOTE: Default: Use 'em all!
     )
     self.shape = (n_sid, n_iid, 2)
     self.dtype = dtype
     self.ndim = 3
コード例 #29
0
    def _read_map_or_bim(basefilename, remove_suffix, add_suffix):
        bimfile = SnpReader._name_of_other_file(basefilename, remove_suffix,
                                                add_suffix)

        properties = {
            "allele_1": None,
            "allele_2": None,
        }

        logging.info("Loading {0} file {1}".format(add_suffix, bimfile))
        with open_bed(basefilename,
                      bim_filepath=bimfile,
                      properties=properties,
                      skip_format_check=True) as bed:
            sid = bed.sid
            pos = np.array([
                bed.chromosome.astype("float"),
                bed.cm_position,
                bed.bp_position,
            ]).T  # LATER could copy in batches to use less memory
            pos[pos == 0] = np.nan

        return sid, pos
コード例 #30
0
ファイル: test_open_bed.py プロジェクト: fastlmm/bed-reader
def test_overrides(shared_datadir):
    with open_bed(shared_datadir / "some_missing.bed") as bed:
        fid = bed.fid
        iid = bed.iid
        father = bed.father
        mother = bed.mother
        sex = bed.sex
        pheno = bed.pheno
        chromosome = bed.chromosome
        sid = bed.sid
        cm_position = bed.cm_position
        bp_position = bed.bp_position
        allele_1 = bed.allele_1
        allele_2 = bed.allele_2
    # lock in the expected results:
    # np.savez(shared_datadir / "some_missing.properties.npz",fid=fid,iid=iid,
    # father=father,mother=mother,sex=sex,pheno=pheno,chromosome=chromosome,
    # sid=sid,cm_position=cm_position,bp_position=bp_position,
    # allele_1=allele_1,allele_2=allele_2)
    property_dict = np.load(shared_datadir / "some_missing.properties.npz")
    assert np.array_equal(property_dict["fid"], fid)
    assert np.array_equal(property_dict["iid"], iid)
    assert np.array_equal(property_dict["father"], father)
    assert np.array_equal(property_dict["mother"], mother)
    assert np.array_equal(property_dict["sex"], sex)
    assert np.array_equal(property_dict["pheno"], pheno)
    assert np.array_equal(property_dict["chromosome"], chromosome)
    assert np.array_equal(property_dict["sid"], sid)
    assert np.array_equal(property_dict["cm_position"], cm_position)
    assert np.array_equal(property_dict["bp_position"], bp_position)
    assert np.array_equal(property_dict["allele_1"], allele_1)
    assert np.array_equal(property_dict["allele_2"], allele_2)

    with pytest.raises(KeyError):
        open_bed(shared_datadir / "some_missing.bed",
                 properties={"unknown": [3, 4, 4]})
    with open_bed(shared_datadir / "some_missing.bed",
                  properties={"iid": None}) as bed1:
        assert bed1.iid is None
    with open_bed(shared_datadir / "some_missing.bed",
                  properties={"iid": []}) as bed1:
        assert np.issubdtype(bed1.iid.dtype, np.str_)
        assert len(bed1.iid) == 0
        with pytest.raises(ValueError):
            bed1.father

    with open_bed(
            shared_datadir / "some_missing.bed",
            properties={"sid": [i for i in range(len(sid))]},
    ) as bed1:
        assert np.issubdtype(bed1.sid.dtype, np.str_)
        assert bed1.sid[0] == "0"
    with pytest.raises(ValueError):
        open_bed(
            shared_datadir / "some_missing.bed",
            properties={"sex": ["F" for i in range(len(sex))]},
        )  # Sex must be coded as a number
    with open_bed(
            shared_datadir / "some_missing.bed",
            properties={"sid": np.array([i for i in range(len(sid))])},
    ) as bed1:
        assert np.issubdtype(bed1.sid.dtype, np.str_)
        assert bed1.sid[0] == "0"
    with pytest.raises(ValueError):
        open_bed(
            shared_datadir / "some_missing.bed",
            properties={"sid": np.array([(i, i) for i in range(len(sid))])},
        )
    with open_bed(shared_datadir / "some_missing.bed",
                  properties={"sid": [1, 2, 3]}) as bed1:
        with pytest.raises(ValueError):
            bed1.chromosome