def test_save_snps_vcf_discrepant_pos(self): s = SNPs("tests/input/testvcf.vcf") r = Resources() r._reference_sequences["GRCh37"] = {} with tempfile.TemporaryDirectory() as tmpdir: dest = os.path.join(tmpdir, "generic.fa.gz") gzip_file("tests/input/generic.fa", dest) seq = ReferenceSequence(ID="1", path=dest) r._reference_sequences["GRCh37"]["1"] = seq # create discrepant SNPs by setting positions outside reference sequence s._snps.loc["rs1", "pos"] = 0 s._snps.loc["rs17", "pos"] = 118 self.assertEqual(os.path.relpath(s.save(vcf=True)), f"output{os.sep}vcf_GRCh37.vcf") pd.testing.assert_frame_equal( s.discrepant_vcf_position, self.create_snp_df( rsid=["rs1", "rs17"], chrom=["1", "1"], pos=[0, 118], genotype=["AA", np.nan], ), check_exact=True, ) expected = self.generic_snps_vcf().drop(["rs1", "rs17"]) self.run_parsing_tests_vcf("output/vcf_GRCh37.vcf", snps_df=expected)
def test_save_snps_phased(self): # read phased data s = SNPs("tests/input/testvcf_phased.vcf") # save phased data to TSV self.assertEqual(os.path.relpath(s.save()), "output/vcf_GRCh37.txt") # read saved TSV self.run_parsing_tests_vcf("output/vcf_GRCh37.txt", phased=True)
def test_save_snps_csv_filename(self): snps = SNPs("tests/input/generic.csv") self.assertEqual( os.path.relpath(snps.save("generic.csv", sep=",")), f"output{os.sep}generic.csv", ) self.run_parsing_tests("output/generic.csv", "generic")
def test_save_snps_vcf_false_positive_build(self): with tempfile.TemporaryDirectory() as tmpdir1: snps = SNPs("tests/input/testvcf.vcf", output_dir=tmpdir1) r = Resources() r._reference_sequences["GRCh37"] = {} output = os.path.join(tmpdir1, "vcf_GRCh37.vcf") with tempfile.TemporaryDirectory() as tmpdir2: dest = os.path.join(tmpdir2, "generic.fa.gz") gzip_file("tests/input/generic.fa", dest) seq = ReferenceSequence(ID="1", path=dest) r._reference_sequences["GRCh37"]["1"] = seq self.assertEqual(snps.save(vcf=True), output) s = "" with open(output, "r") as f: for line in f.readlines(): if "snps v" in line: s += '##source="vcf; snps v1.2.3.post85.dev0+gb386302; https://pypi.org/project/snps/"\n' else: s += line with open(output, "w") as f: f.write(s) self.run_parsing_tests_vcf(output)
def test_save_snps_tsv_filename(self): with tempfile.TemporaryDirectory() as tmpdir: snps = SNPs("tests/input/generic.tsv", output_dir=tmpdir) dest = os.path.join(tmpdir, "generic.tsv") self.assertEqual( snps.save("generic.tsv", sep="\t"), dest, ) self.run_parsing_tests(dest, "generic")
def test_save_snps_phased(self): with tempfile.TemporaryDirectory() as tmpdir: # read phased data s = SNPs("tests/input/testvcf_phased.vcf", output_dir=tmpdir) dest = os.path.join(tmpdir, "vcf_GRCh37.txt") # save phased data to TSV self.assertEqual(s.save(), dest) # read saved TSV self.run_parsing_tests_vcf(dest, phased=True)
def test_save_source(self): s = SNPs("tests/input/GRCh38.csv") self.assertEqual(os.path.relpath(s.save()), f"output{os.sep}generic_GRCh38.txt") snps = SNPs("output/generic_GRCh38.txt") self.assertEqual(snps.build, 38) self.assertTrue(snps.build_detected) self.assertEqual(snps.source, "generic") self.assertListEqual(snps._source, ["generic"]) pd.testing.assert_frame_equal(snps.snps, self.snps_GRCh38(), check_exact=True)
def test_save_source(self): with tempfile.TemporaryDirectory() as tmpdir: s = SNPs("tests/input/GRCh38.csv", output_dir=tmpdir) dest = os.path.join(tmpdir, "generic_GRCh38.txt") self.assertEqual(s.save(), dest) snps = SNPs(dest) self.assertEqual(snps.build, 38) self.assertTrue(snps.build_detected) self.assertEqual(snps.source, "generic") self.assertListEqual(snps._source, ["generic"]) pd.testing.assert_frame_equal(snps.snps, self.snps_GRCh38(), check_exact=True)
def test_source_snps(self): with tempfile.TemporaryDirectory() as tmpdir: s = SNPs("tests/input/GRCh37.csv", output_dir=tmpdir) self.assertEqual(s.source, "generic") results = s.merge((SNPs("tests/input/23andme.txt"), )) self.assertEqual(s.source, "generic, 23andMe") self.assertListEqual(s._source, ["generic", "23andMe"]) dest = os.path.join(tmpdir, "generic__23andMe_GRCh37.txt") self.assertEqual(s.save(), dest) s = SNPs(dest) self.assertEqual(s.source, "generic, 23andMe") self.assertListEqual(s._source, ["generic", "23andMe"]) pd.testing.assert_frame_equal(s.snps, s.snps, check_exact=True) self.assert_results(results, [{"merged": True}])
def test_source_snps(self): s = SNPs("tests/input/GRCh37.csv") self.assertEqual(s.source, "generic") results = s.merge((SNPs("tests/input/23andme.txt"),)) self.assertEqual(s.source, "generic, 23andMe") self.assertListEqual(s._source, ["generic", "23andMe"]) self.assertEqual( os.path.relpath(s.save()), f"output{os.sep}generic__23andMe_GRCh37.txt" ) s = SNPs("output/generic__23andMe_GRCh37.txt") self.assertEqual(s.source, "generic, 23andMe") self.assertListEqual(s._source, ["generic", "23andMe"]) pd.testing.assert_frame_equal(s.snps, s.snps, check_exact=True) self.assert_results(results, [{"merged": True}])
def test_save_snps_false_positive_build(self): snps = SNPs("tests/input/generic.csv") output = "output/generic_GRCh37.txt" self.assertEqual(os.path.relpath(snps.save()), output) s = "" with open(output, "r") as f: for line in f.readlines(): if "snps v" in line: s += "# Generated by snps v1.2.3.post85.dev0+gb386302, https://pypi.org/project/snps/\n" else: s += line with open(output, "w") as f: f.write(s) self.run_parsing_tests(output, "generic")
def test_save_snps_vcf(self): s = SNPs("tests/input/testvcf.vcf") r = Resources() r._reference_sequences["GRCh37"] = {} with tempfile.TemporaryDirectory() as tmpdir: dest = os.path.join(tmpdir, "generic.fa.gz") gzip_file("tests/input/generic.fa", dest) seq = ReferenceSequence(ID="1", path=dest) r._reference_sequences["GRCh37"]["1"] = seq self.assertEqual(os.path.relpath(s.save(vcf=True)), f"output{os.sep}vcf_GRCh37.vcf") self.run_parsing_tests_vcf("output/vcf_GRCh37.vcf")
def test_save_snps_false_positive_build(self): with tempfile.TemporaryDirectory() as tmpdir: snps = SNPs("tests/input/generic.csv", output_dir=tmpdir) output = os.path.join(tmpdir, "generic_GRCh37.txt") self.assertEqual(snps.save(), output) s = "" with open(output, "r") as f: for line in f.readlines(): if "snps v" in line: s += "# Generated by snps v1.2.3.post85.dev0+gb386302, https://pypi.org/project/snps/\n" else: s += line with open(output, "w") as f: f.write(s) self.run_parsing_tests(output, "generic")
def test_save_snps_vcf(self): with tempfile.TemporaryDirectory() as tmpdir1: s = SNPs("tests/input/testvcf.vcf", output_dir=tmpdir1) r = Resources() r._reference_sequences["GRCh37"] = {} output = os.path.join(tmpdir1, "vcf_GRCh37.vcf") with tempfile.TemporaryDirectory() as tmpdir2: dest = os.path.join(tmpdir2, "generic.fa.gz") gzip_file("tests/input/generic.fa", dest) seq = ReferenceSequence(ID="1", path=dest) r._reference_sequences["GRCh37"]["1"] = seq self.assertEqual(s.save(vcf=True), output) self.run_parsing_tests_vcf(output)
def test_save_snps_vcf_phased(self): # read phased data s = SNPs("tests/input/testvcf_phased.vcf") # setup resource to use test FASTA reference sequence r = Resources() r._reference_sequences["GRCh37"] = {} with tempfile.TemporaryDirectory() as tmpdir: dest = os.path.join(tmpdir, "generic.fa.gz") gzip_file("tests/input/generic.fa", dest) seq = ReferenceSequence(ID="1", path=dest) r._reference_sequences["GRCh37"]["1"] = seq # save phased data to VCF self.assertEqual(os.path.relpath(s.save(vcf=True)), f"output{os.sep}vcf_GRCh37.vcf") # read saved VCF self.run_parsing_tests_vcf("output/vcf_GRCh37.vcf", phased=True)
def test_save_snps_vcf_discrepant_pos(self): with tempfile.TemporaryDirectory() as tmpdir1: s = SNPs("tests/input/testvcf.vcf", output_dir=tmpdir1) r = Resources() r._reference_sequences["GRCh37"] = {} output = os.path.join(tmpdir1, "vcf_GRCh37.vcf") with tempfile.TemporaryDirectory() as tmpdir2: dest = os.path.join(tmpdir2, "generic.fa.gz") gzip_file("tests/input/generic.fa", dest) seq = ReferenceSequence(ID="1", path=dest) r._reference_sequences["GRCh37"]["1"] = seq # create discrepant SNPs by setting positions outside reference sequence s._snps.loc["rs1", "pos"] = 0 s._snps.loc["rs17", "pos"] = 118 # esnure this is the right type after manual tweaking s._snps = s._snps.astype({"pos": np.uint32}) self.assertEqual(s.save(vcf=True), output) pd.testing.assert_frame_equal( s.discrepant_vcf_position, self.create_snp_df( rsid=["rs1", "rs17"], chrom=["1", "1"], pos=[0, 118], genotype=["AA", np.nan], ), check_exact=True, ) expected = self.generic_snps_vcf().drop(["rs1", "rs17"]) self.run_parsing_tests_vcf(output, snps_df=expected)
def test_save_buffer_binary(self): s = SNPs("tests/input/generic.csv") out = io.BytesIO() s.save(out) self.assertTrue(out.read().startswith(b"# Generated by snps"))
def test_save_snps_csv(self): snps = SNPs("tests/input/generic.csv") self.assertEqual(os.path.relpath(snps.save(sep=",")), "output/generic_GRCh37.csv") self.run_parsing_tests("output/generic_GRCh37.csv", "generic")
def test_save_buffer(self): s = SNPs("tests/input/generic.csv") out = io.StringIO() s.save(out) self.assertTrue(out.read().startswith("# Generated by snps"))
def test_save_snps(self): with tempfile.TemporaryDirectory() as tmpdir: snps = SNPs("tests/input/generic.csv", output_dir=tmpdir) dest = os.path.join(tmpdir, "generic_GRCh37.txt") self.assertEqual(snps.save(), dest) self.run_parsing_tests(dest, "generic")
def test_save_no_snps(self): s = SNPs() self.assertFalse(s.save())
def test_save_no_snps_vcf(self): s = SNPs() self.assertFalse(s.save(vcf=True))
def test_save_snps_specify_file(self): s = SNPs("tests/input/generic.csv") self.assertEqual(os.path.relpath(s.save("snps.csv")), f"output{os.sep}snps.csv") self.run_parsing_tests("output/snps.csv", "generic")
def test_save_snps(self): snps = SNPs("tests/input/generic.csv") self.assertEqual(os.path.relpath(snps.save()), f"output{os.sep}generic_GRCh37.txt") self.run_parsing_tests("output/generic_GRCh37.txt", "generic")