Esempio n. 1
0
 def test_save_discrepant_genotypes(self):
     sc = SNPsCollection()
     sc.load_snps(["tests/input/NCBI36.csv", "tests/input/GRCh37.csv"])
     assert len(sc.discrepant_genotypes) == 1
     discrepant_genotypes_file = sc.save_discrepant_genotypes()
     assert (os.path.relpath(discrepant_genotypes_file) ==
             "output/discrepant_genotypes.csv")
     assert os.path.exists(discrepant_genotypes_file)
Esempio n. 2
0
 def test_save_discrepant_snps_specify_file(self):
     sc = SNPsCollection()
     sc.load_snps(["tests/input/NCBI36.csv", "tests/input/GRCh37.csv"])
     assert len(sc.discrepant_snps) == 4
     discrepant_snps_file = sc.save_discrepant_snps("discrepant_snps.csv")
     assert os.path.relpath(
         discrepant_snps_file) == "output/discrepant_snps.csv"
     assert os.path.exists(discrepant_snps_file)
Esempio n. 3
0
 def test_load_snps_assembly_mismatch(self):
     sc = SNPsCollection()
     sc.load_snps(["tests/input/NCBI36.csv", "tests/input/GRCh37.csv"])
     assert not os.path.exists("output/ind_discrepant_positions_1.csv")
     assert not os.path.exists("output/ind_discrepant_genotypes_1.csv")
     assert len(sc.discrepant_positions) == 4
     assert len(sc.discrepant_genotypes) == 1
     pd.testing.assert_frame_equal(sc.snps,
                                   self.snps_NCBI36_discrepant_snps())
Esempio n. 4
0
 def test_source_lineage_file(self):
     sc = SNPsCollection("tests/input/GRCh37.csv")
     assert sc.source == "generic"
     sc.load_snps("tests/input/23andme.txt")
     assert sc.source == "generic, 23andMe"
     file = sc.save_snps()
     s = SNPs(file)
     assert s.source == "generic, 23andMe"
     pd.testing.assert_frame_equal(sc.snps, s.snps)
Esempio n. 5
0
 def test_source_lineage_file_gzip(self):
     sc = SNPsCollection("tests/input/GRCh37.csv")
     assert sc.source == "generic"
     sc.load_snps("tests/input/23andme.txt")
     assert sc.source == "generic, 23andMe"
     file = sc.save_snps()
     with open(file, "rb") as f_in:
         with atomic_write(file + ".gz", mode="wb",
                           overwrite=True) as f_out:
             with gzip.open(f_out, "wb") as f_gzip:
                 shutil.copyfileobj(f_in, f_gzip)
     s = SNPs(file + ".gz")
     assert s.source == "generic, 23andMe"
     pd.testing.assert_frame_equal(sc.snps, s.snps)
Esempio n. 6
0
 def test___repr__snps_collection(self):
     sc = SNPsCollection()
     assert "SNPsCollection(name='')" == sc.__repr__()
Esempio n. 7
0
 def test_save_discrepant_snps_exception(self):
     sc = SNPsCollection()
     sc._discrepant_snps = "invalid"
     assert not sc.save_discrepant_snps()
Esempio n. 8
0
 def test_save_discrepant_snps_no_discrepant_snps(self):
     sc = SNPsCollection()
     assert len(sc.discrepant_snps) == 0
     assert not sc.save_discrepant_snps()
Esempio n. 9
0
    def test_merging_files_discrepant_snps(self):
        df = pd.read_csv(
            "tests/input/discrepant_snps.csv",
            skiprows=1,
            na_values="--",
            names=[
                "rsid",
                "chrom",
                "pos_file1",
                "pos_file2",
                "genotype_file1",
                "genotype_file2",
                "discrepant_position",
                "discrepant_genotype",
                "expected_position",
                "expected_genotype",
            ],
            index_col=0,
            dtype={
                "chrom": object,
                "pos_file1": np.int64,
                "pos_file2": np.int64,
                "discrepant_position": bool,
                "discrepant_genotype": bool,
            },
        )

        df1 = df[["chrom", "pos_file1", "genotype_file1"]]
        df2 = df[["chrom", "pos_file2", "genotype_file2"]]

        df1.to_csv(
            "tests/input/discrepant_snps1.csv",
            na_rep="--",
            header=["chromosome", "position", "genotype"],
        )

        df2.to_csv(
            "tests/input/discrepant_snps2.csv",
            na_rep="--",
            header=["chromosome", "position", "genotype"],
        )

        sc = SNPsCollection([
            "tests/input/discrepant_snps1.csv",
            "tests/input/discrepant_snps2.csv"
        ])

        expected = df[[
            "chrom",
            "discrepant_position",
            "discrepant_genotype",
            "expected_position",
            "expected_genotype",
        ]]
        expected = expected.rename(columns={
            "expected_position": "pos",
            "expected_genotype": "genotype"
        })
        expected_snps = SNPs()
        expected_snps._snps = expected
        expected_snps.sort_snps()
        expected = expected_snps.snps

        pd.testing.assert_index_equal(
            sc.discrepant_positions.index,
            expected.loc[expected["discrepant_position"] == True].index,
        )

        pd.testing.assert_index_equal(
            sc.discrepant_genotypes.index,
            expected.loc[expected["discrepant_genotype"] == True].index,
        )

        pd.testing.assert_series_equal(sc.snps["pos"], expected["pos"])
        pd.testing.assert_series_equal(sc.snps["genotype"],
                                       expected["genotype"])
Esempio n. 10
0
 def test_load_snps_invalid_file(self):
     sc = SNPsCollection()
     with atomic_write("tests/input/empty.txt", mode="w", overwrite=True):
         pass
     sc.load_snps(["tests/input/GRCh37.csv", "tests/input/empty.txt"])
     pd.testing.assert_frame_equal(sc.snps, self.snps_GRCh37())
Esempio n. 11
0
 def test_load_snps_non_existent_file(self):
     sc = SNPsCollection()
     sc.load_snps(
         ["tests/input/GRCh37.csv", "tests/input/non_existent_file.csv"])
     pd.testing.assert_frame_equal(sc.snps, self.snps_GRCh37())
Esempio n. 12
0
 def test_discrepant_snps(self):
     sc = SNPsCollection()
     sc.load_snps(["tests/input/NCBI36.csv", "tests/input/GRCh37.csv"])
     assert len(sc.discrepant_snps) == 4
Esempio n. 13
0
 def test_load_snps_None(self):
     sc = SNPsCollection()
     with self.assertRaises(TypeError):
         sc.load_snps(None)
Esempio n. 14
0
 def test_load_snps_list(self):
     sc = SNPsCollection()
     sc.load_snps(["tests/input/GRCh37.csv", "tests/input/GRCh37.csv"])
     pd.testing.assert_frame_equal(sc.snps, self.snps_GRCh37())
     assert sc.source == "generic, generic"