Esempio n. 1
0
    def test_save_snps_vcf_discrepant_pos(self):
        s = SNPs("tests/input/testvcf.vcf")

        r = Resources()
        r._reference_sequences["GRCh37"] = {}

        with tempfile.TemporaryDirectory() as tmpdir:
            dest = os.path.join(tmpdir, "generic.fa.gz")
            gzip_file("tests/input/generic.fa", dest)

            seq = ReferenceSequence(ID="1", path=dest)

            r._reference_sequences["GRCh37"]["1"] = seq

            # create discrepant SNPs by setting positions outside reference sequence
            s._snps.loc["rs1", "pos"] = 0
            s._snps.loc["rs17", "pos"] = 118

            self.assertEqual(os.path.relpath(s.save(vcf=True)),
                             f"output{os.sep}vcf_GRCh37.vcf")

        pd.testing.assert_frame_equal(
            s.discrepant_vcf_position,
            self.create_snp_df(
                rsid=["rs1", "rs17"],
                chrom=["1", "1"],
                pos=[0, 118],
                genotype=["AA", np.nan],
            ),
            check_exact=True,
        )

        expected = self.generic_snps_vcf().drop(["rs1", "rs17"])
        self.run_parsing_tests_vcf("output/vcf_GRCh37.vcf", snps_df=expected)
Esempio n. 2
0
 def test_save_snps_phased(self):
     # read phased data
     s = SNPs("tests/input/testvcf_phased.vcf")
     # save phased data to TSV
     self.assertEqual(os.path.relpath(s.save()), "output/vcf_GRCh37.txt")
     # read saved TSV
     self.run_parsing_tests_vcf("output/vcf_GRCh37.txt", phased=True)
Esempio n. 3
0
 def test_save_snps_csv_filename(self):
     snps = SNPs("tests/input/generic.csv")
     self.assertEqual(
         os.path.relpath(snps.save("generic.csv", sep=",")),
         f"output{os.sep}generic.csv",
     )
     self.run_parsing_tests("output/generic.csv", "generic")
Esempio n. 4
0
    def test_save_snps_vcf_false_positive_build(self):
        with tempfile.TemporaryDirectory() as tmpdir1:
            snps = SNPs("tests/input/testvcf.vcf", output_dir=tmpdir1)

            r = Resources()
            r._reference_sequences["GRCh37"] = {}

            output = os.path.join(tmpdir1, "vcf_GRCh37.vcf")
            with tempfile.TemporaryDirectory() as tmpdir2:
                dest = os.path.join(tmpdir2, "generic.fa.gz")
                gzip_file("tests/input/generic.fa", dest)

                seq = ReferenceSequence(ID="1", path=dest)

                r._reference_sequences["GRCh37"]["1"] = seq

                self.assertEqual(snps.save(vcf=True), output)

                s = ""
                with open(output, "r") as f:
                    for line in f.readlines():
                        if "snps v" in line:
                            s += '##source="vcf; snps v1.2.3.post85.dev0+gb386302; https://pypi.org/project/snps/"\n'
                        else:
                            s += line

                with open(output, "w") as f:
                    f.write(s)

            self.run_parsing_tests_vcf(output)
Esempio n. 5
0
 def test_save_snps_tsv_filename(self):
     with tempfile.TemporaryDirectory() as tmpdir:
         snps = SNPs("tests/input/generic.tsv", output_dir=tmpdir)
         dest = os.path.join(tmpdir, "generic.tsv")
         self.assertEqual(
             snps.save("generic.tsv", sep="\t"), dest,
         )
         self.run_parsing_tests(dest, "generic")
Esempio n. 6
0
 def test_save_snps_phased(self):
     with tempfile.TemporaryDirectory() as tmpdir:
         # read phased data
         s = SNPs("tests/input/testvcf_phased.vcf", output_dir=tmpdir)
         dest = os.path.join(tmpdir, "vcf_GRCh37.txt")
         # save phased data to TSV
         self.assertEqual(s.save(), dest)
         # read saved TSV
         self.run_parsing_tests_vcf(dest, phased=True)
Esempio n. 7
0
 def test_save_source(self):
     s = SNPs("tests/input/GRCh38.csv")
     self.assertEqual(os.path.relpath(s.save()), f"output{os.sep}generic_GRCh38.txt")
     snps = SNPs("output/generic_GRCh38.txt")
     self.assertEqual(snps.build, 38)
     self.assertTrue(snps.build_detected)
     self.assertEqual(snps.source, "generic")
     self.assertListEqual(snps._source, ["generic"])
     pd.testing.assert_frame_equal(snps.snps, self.snps_GRCh38(), check_exact=True)
Esempio n. 8
0
 def test_save_source(self):
     with tempfile.TemporaryDirectory() as tmpdir:
         s = SNPs("tests/input/GRCh38.csv", output_dir=tmpdir)
         dest = os.path.join(tmpdir, "generic_GRCh38.txt")
         self.assertEqual(s.save(), dest)
         snps = SNPs(dest)
         self.assertEqual(snps.build, 38)
         self.assertTrue(snps.build_detected)
         self.assertEqual(snps.source, "generic")
         self.assertListEqual(snps._source, ["generic"])
         pd.testing.assert_frame_equal(snps.snps,
                                       self.snps_GRCh38(),
                                       check_exact=True)
Esempio n. 9
0
 def test_source_snps(self):
     with tempfile.TemporaryDirectory() as tmpdir:
         s = SNPs("tests/input/GRCh37.csv", output_dir=tmpdir)
         self.assertEqual(s.source, "generic")
         results = s.merge((SNPs("tests/input/23andme.txt"), ))
         self.assertEqual(s.source, "generic, 23andMe")
         self.assertListEqual(s._source, ["generic", "23andMe"])
         dest = os.path.join(tmpdir, "generic__23andMe_GRCh37.txt")
         self.assertEqual(s.save(), dest)
         s = SNPs(dest)
         self.assertEqual(s.source, "generic, 23andMe")
         self.assertListEqual(s._source, ["generic", "23andMe"])
         pd.testing.assert_frame_equal(s.snps, s.snps, check_exact=True)
         self.assert_results(results, [{"merged": True}])
Esempio n. 10
0
 def test_source_snps(self):
     s = SNPs("tests/input/GRCh37.csv")
     self.assertEqual(s.source, "generic")
     results = s.merge((SNPs("tests/input/23andme.txt"),))
     self.assertEqual(s.source, "generic, 23andMe")
     self.assertListEqual(s._source, ["generic", "23andMe"])
     self.assertEqual(
         os.path.relpath(s.save()), f"output{os.sep}generic__23andMe_GRCh37.txt"
     )
     s = SNPs("output/generic__23andMe_GRCh37.txt")
     self.assertEqual(s.source, "generic, 23andMe")
     self.assertListEqual(s._source, ["generic", "23andMe"])
     pd.testing.assert_frame_equal(s.snps, s.snps, check_exact=True)
     self.assert_results(results, [{"merged": True}])
Esempio n. 11
0
    def test_save_snps_false_positive_build(self):
        snps = SNPs("tests/input/generic.csv")
        output = "output/generic_GRCh37.txt"
        self.assertEqual(os.path.relpath(snps.save()), output)

        s = ""
        with open(output, "r") as f:
            for line in f.readlines():
                if "snps v" in line:
                    s += "# Generated by snps v1.2.3.post85.dev0+gb386302, https://pypi.org/project/snps/\n"
                else:
                    s += line

        with open(output, "w") as f:
            f.write(s)

        self.run_parsing_tests(output, "generic")
Esempio n. 12
0
    def test_save_snps_vcf(self):
        s = SNPs("tests/input/testvcf.vcf")

        r = Resources()
        r._reference_sequences["GRCh37"] = {}

        with tempfile.TemporaryDirectory() as tmpdir:
            dest = os.path.join(tmpdir, "generic.fa.gz")
            gzip_file("tests/input/generic.fa", dest)

            seq = ReferenceSequence(ID="1", path=dest)

            r._reference_sequences["GRCh37"]["1"] = seq

            self.assertEqual(os.path.relpath(s.save(vcf=True)),
                             f"output{os.sep}vcf_GRCh37.vcf")

        self.run_parsing_tests_vcf("output/vcf_GRCh37.vcf")
Esempio n. 13
0
    def test_save_snps_false_positive_build(self):
        with tempfile.TemporaryDirectory() as tmpdir:
            snps = SNPs("tests/input/generic.csv", output_dir=tmpdir)
            output = os.path.join(tmpdir, "generic_GRCh37.txt")
            self.assertEqual(snps.save(), output)

            s = ""
            with open(output, "r") as f:
                for line in f.readlines():
                    if "snps v" in line:
                        s += "# Generated by snps v1.2.3.post85.dev0+gb386302, https://pypi.org/project/snps/\n"
                    else:
                        s += line

            with open(output, "w") as f:
                f.write(s)

            self.run_parsing_tests(output, "generic")
Esempio n. 14
0
    def test_save_snps_vcf(self):
        with tempfile.TemporaryDirectory() as tmpdir1:
            s = SNPs("tests/input/testvcf.vcf", output_dir=tmpdir1)

            r = Resources()
            r._reference_sequences["GRCh37"] = {}

            output = os.path.join(tmpdir1, "vcf_GRCh37.vcf")
            with tempfile.TemporaryDirectory() as tmpdir2:
                dest = os.path.join(tmpdir2, "generic.fa.gz")
                gzip_file("tests/input/generic.fa", dest)

                seq = ReferenceSequence(ID="1", path=dest)

                r._reference_sequences["GRCh37"]["1"] = seq

                self.assertEqual(s.save(vcf=True), output)

            self.run_parsing_tests_vcf(output)
Esempio n. 15
0
    def test_save_snps_vcf_phased(self):
        # read phased data
        s = SNPs("tests/input/testvcf_phased.vcf")

        # setup resource to use test FASTA reference sequence
        r = Resources()
        r._reference_sequences["GRCh37"] = {}

        with tempfile.TemporaryDirectory() as tmpdir:
            dest = os.path.join(tmpdir, "generic.fa.gz")
            gzip_file("tests/input/generic.fa", dest)

            seq = ReferenceSequence(ID="1", path=dest)

            r._reference_sequences["GRCh37"]["1"] = seq

            # save phased data to VCF
            self.assertEqual(os.path.relpath(s.save(vcf=True)),
                             f"output{os.sep}vcf_GRCh37.vcf")

        # read saved VCF
        self.run_parsing_tests_vcf("output/vcf_GRCh37.vcf", phased=True)
Esempio n. 16
0
    def test_save_snps_vcf_discrepant_pos(self):
        with tempfile.TemporaryDirectory() as tmpdir1:
            s = SNPs("tests/input/testvcf.vcf", output_dir=tmpdir1)

            r = Resources()
            r._reference_sequences["GRCh37"] = {}

            output = os.path.join(tmpdir1, "vcf_GRCh37.vcf")
            with tempfile.TemporaryDirectory() as tmpdir2:
                dest = os.path.join(tmpdir2, "generic.fa.gz")
                gzip_file("tests/input/generic.fa", dest)

                seq = ReferenceSequence(ID="1", path=dest)

                r._reference_sequences["GRCh37"]["1"] = seq

                # create discrepant SNPs by setting positions outside reference sequence
                s._snps.loc["rs1", "pos"] = 0
                s._snps.loc["rs17", "pos"] = 118

                # esnure this is the right type after manual tweaking
                s._snps = s._snps.astype({"pos": np.uint32})

                self.assertEqual(s.save(vcf=True), output)

            pd.testing.assert_frame_equal(
                s.discrepant_vcf_position,
                self.create_snp_df(
                    rsid=["rs1", "rs17"],
                    chrom=["1", "1"],
                    pos=[0, 118],
                    genotype=["AA", np.nan],
                ),
                check_exact=True,
            )

            expected = self.generic_snps_vcf().drop(["rs1", "rs17"])
            self.run_parsing_tests_vcf(output, snps_df=expected)
Esempio n. 17
0
 def test_save_buffer_binary(self):
     s = SNPs("tests/input/generic.csv")
     out = io.BytesIO()
     s.save(out)
     self.assertTrue(out.read().startswith(b"# Generated by snps"))
Esempio n. 18
0
 def test_save_snps_csv(self):
     snps = SNPs("tests/input/generic.csv")
     self.assertEqual(os.path.relpath(snps.save(sep=",")),
                      "output/generic_GRCh37.csv")
     self.run_parsing_tests("output/generic_GRCh37.csv", "generic")
Esempio n. 19
0
 def test_save_buffer(self):
     s = SNPs("tests/input/generic.csv")
     out = io.StringIO()
     s.save(out)
     self.assertTrue(out.read().startswith("# Generated by snps"))
Esempio n. 20
0
 def test_save_snps(self):
     with tempfile.TemporaryDirectory() as tmpdir:
         snps = SNPs("tests/input/generic.csv", output_dir=tmpdir)
         dest = os.path.join(tmpdir, "generic_GRCh37.txt")
         self.assertEqual(snps.save(), dest)
         self.run_parsing_tests(dest, "generic")
Esempio n. 21
0
 def test_save_no_snps(self):
     s = SNPs()
     self.assertFalse(s.save())
Esempio n. 22
0
 def test_save_no_snps_vcf(self):
     s = SNPs()
     self.assertFalse(s.save(vcf=True))
Esempio n. 23
0
 def test_save_snps_specify_file(self):
     s = SNPs("tests/input/generic.csv")
     self.assertEqual(os.path.relpath(s.save("snps.csv")),
                      f"output{os.sep}snps.csv")
     self.run_parsing_tests("output/snps.csv", "generic")
Esempio n. 24
0
 def test_save_snps(self):
     snps = SNPs("tests/input/generic.csv")
     self.assertEqual(os.path.relpath(snps.save()),
                      f"output{os.sep}generic_GRCh37.txt")
     self.run_parsing_tests("output/generic_GRCh37.txt", "generic")