def test_save_discrepant_genotypes(self): sc = SNPsCollection() sc.load_snps(["tests/input/NCBI36.csv", "tests/input/GRCh37.csv"]) assert len(sc.discrepant_genotypes) == 1 discrepant_genotypes_file = sc.save_discrepant_genotypes() assert (os.path.relpath(discrepant_genotypes_file) == "output/discrepant_genotypes.csv") assert os.path.exists(discrepant_genotypes_file)
def test_save_discrepant_snps_specify_file(self): sc = SNPsCollection() sc.load_snps(["tests/input/NCBI36.csv", "tests/input/GRCh37.csv"]) assert len(sc.discrepant_snps) == 4 discrepant_snps_file = sc.save_discrepant_snps("discrepant_snps.csv") assert os.path.relpath( discrepant_snps_file) == "output/discrepant_snps.csv" assert os.path.exists(discrepant_snps_file)
def test_load_snps_assembly_mismatch(self): sc = SNPsCollection() sc.load_snps(["tests/input/NCBI36.csv", "tests/input/GRCh37.csv"]) assert not os.path.exists("output/ind_discrepant_positions_1.csv") assert not os.path.exists("output/ind_discrepant_genotypes_1.csv") assert len(sc.discrepant_positions) == 4 assert len(sc.discrepant_genotypes) == 1 pd.testing.assert_frame_equal(sc.snps, self.snps_NCBI36_discrepant_snps())
def test_source_lineage_file(self): sc = SNPsCollection("tests/input/GRCh37.csv") assert sc.source == "generic" sc.load_snps("tests/input/23andme.txt") assert sc.source == "generic, 23andMe" file = sc.save_snps() s = SNPs(file) assert s.source == "generic, 23andMe" pd.testing.assert_frame_equal(sc.snps, s.snps)
def test_source_lineage_file_gzip(self): sc = SNPsCollection("tests/input/GRCh37.csv") assert sc.source == "generic" sc.load_snps("tests/input/23andme.txt") assert sc.source == "generic, 23andMe" file = sc.save_snps() with open(file, "rb") as f_in: with atomic_write(file + ".gz", mode="wb", overwrite=True) as f_out: with gzip.open(f_out, "wb") as f_gzip: shutil.copyfileobj(f_in, f_gzip) s = SNPs(file + ".gz") assert s.source == "generic, 23andMe" pd.testing.assert_frame_equal(sc.snps, s.snps)
def test___repr__snps_collection(self): sc = SNPsCollection() assert "SNPsCollection(name='')" == sc.__repr__()
def test_save_discrepant_snps_exception(self): sc = SNPsCollection() sc._discrepant_snps = "invalid" assert not sc.save_discrepant_snps()
def test_save_discrepant_snps_no_discrepant_snps(self): sc = SNPsCollection() assert len(sc.discrepant_snps) == 0 assert not sc.save_discrepant_snps()
def test_merging_files_discrepant_snps(self): df = pd.read_csv( "tests/input/discrepant_snps.csv", skiprows=1, na_values="--", names=[ "rsid", "chrom", "pos_file1", "pos_file2", "genotype_file1", "genotype_file2", "discrepant_position", "discrepant_genotype", "expected_position", "expected_genotype", ], index_col=0, dtype={ "chrom": object, "pos_file1": np.int64, "pos_file2": np.int64, "discrepant_position": bool, "discrepant_genotype": bool, }, ) df1 = df[["chrom", "pos_file1", "genotype_file1"]] df2 = df[["chrom", "pos_file2", "genotype_file2"]] df1.to_csv( "tests/input/discrepant_snps1.csv", na_rep="--", header=["chromosome", "position", "genotype"], ) df2.to_csv( "tests/input/discrepant_snps2.csv", na_rep="--", header=["chromosome", "position", "genotype"], ) sc = SNPsCollection([ "tests/input/discrepant_snps1.csv", "tests/input/discrepant_snps2.csv" ]) expected = df[[ "chrom", "discrepant_position", "discrepant_genotype", "expected_position", "expected_genotype", ]] expected = expected.rename(columns={ "expected_position": "pos", "expected_genotype": "genotype" }) expected_snps = SNPs() expected_snps._snps = expected expected_snps.sort_snps() expected = expected_snps.snps pd.testing.assert_index_equal( sc.discrepant_positions.index, expected.loc[expected["discrepant_position"] == True].index, ) pd.testing.assert_index_equal( sc.discrepant_genotypes.index, expected.loc[expected["discrepant_genotype"] == True].index, ) pd.testing.assert_series_equal(sc.snps["pos"], expected["pos"]) pd.testing.assert_series_equal(sc.snps["genotype"], expected["genotype"])
def test_load_snps_invalid_file(self): sc = SNPsCollection() with atomic_write("tests/input/empty.txt", mode="w", overwrite=True): pass sc.load_snps(["tests/input/GRCh37.csv", "tests/input/empty.txt"]) pd.testing.assert_frame_equal(sc.snps, self.snps_GRCh37())
def test_load_snps_non_existent_file(self): sc = SNPsCollection() sc.load_snps( ["tests/input/GRCh37.csv", "tests/input/non_existent_file.csv"]) pd.testing.assert_frame_equal(sc.snps, self.snps_GRCh37())
def test_discrepant_snps(self): sc = SNPsCollection() sc.load_snps(["tests/input/NCBI36.csv", "tests/input/GRCh37.csv"]) assert len(sc.discrepant_snps) == 4
def test_load_snps_None(self): sc = SNPsCollection() with self.assertRaises(TypeError): sc.load_snps(None)
def test_load_snps_list(self): sc = SNPsCollection() sc.load_snps(["tests/input/GRCh37.csv", "tests/input/GRCh37.csv"]) pd.testing.assert_frame_equal(sc.snps, self.snps_GRCh37()) assert sc.source == "generic, generic"