def f(): snps = SNPs("tests/input/generic.csv") self.assertEqual( os.path.relpath(snps.save_snps(sep=",")), f"output{os.sep}generic_GRCh37.csv", ) self.run_parsing_tests("output/generic_GRCh37.csv", "generic")
def test_save_snps_vcf_phased(self): # read phased data s = SNPs("tests/input/testvcf_phased.vcf") # setup resource to use test FASTA reference sequence r = Resources() r._reference_sequences["GRCh37"] = {} with open("tests/input/generic.fa", "rb") as f_in: with atomic_write("tests/input/generic.fa.gz", mode="wb", overwrite=True) as f_out: with gzip.open(f_out, "wb") as f_gzip: shutil.copyfileobj(f_in, f_gzip) seq = ReferenceSequence(ID="1", path="tests/input/generic.fa.gz") r._reference_sequences["GRCh37"]["1"] = seq # save phased data to VCF assert os.path.relpath( s.save_snps(vcf=True)) == "output/vcf_GRCh37.vcf" # read saved VCF s = SNPs("output/vcf_GRCh37.vcf") assert s.phased pd.testing.assert_frame_equal(s.snps, self.generic_snps_vcf())
def f(): with tempfile.TemporaryDirectory() as tmpdir: snps = SNPs("tests/input/generic.csv", output_dir=tmpdir) dest = os.path.join(tmpdir, "generic_GRCh37.csv") self.assertEqual( snps.save_snps(sep=","), dest, ) self.run_parsing_tests(dest, "generic")
def test_save_snps_csv_phased(self): # read phased data s = SNPs("tests/input/testvcf_phased.vcf") # save phased data to CSV assert os.path.relpath(s.save_snps()) == "output/vcf_GRCh37.csv" # read saved CSV s = SNPs("output/vcf_GRCh37.csv") assert s.phased pd.testing.assert_frame_equal(s.snps, self.generic_snps_vcf())
""" Validate input VCF files & remap them to GRCh37. depends on: > python 3 > argparse==1.4.0 > snps==0.4.0 > io """ import argparse from snps import SNPs import io parser = argparse.ArgumentParser(description='Remap VCF files to GRCh37') parser.add_argument('-i', '--input_file', help='Input VCF file') parser.add_argument('-o', '--output_file', help='Output VCF file basename') args = vars(parser.parse_args()) input_file = args['input_file'] output_file = args['output_file'] output_file_name = f"{output_file}.vcf" # read & validate input file snps = SNPs(input_file) # remap SNPs if reference genome is not GRCh37 if snps.build_detected and snps.build != 37: snps.remap_snps(37) # save to file saved_snps = snps.save_snps(output_file_name, sep="\t", header=False, vcf=True)
) parser.add_argument( '-t', '--input_target', help= 'Input BIM file (a combination of all BIM files, transformed into a 23andme-like format' ) parser.add_argument( '-b', '--input_base', help='Input base file, transformed into a 23andme-like format') args = vars(parser.parse_args()) # Args to variable input_target = args['input_target'] input_base = args['input_base'] ############################################################################### # Detect builds and update the base's build if it does not match the target's # ############################################################################### target = SNPs(input_target, output_dir='.') base = SNPs(input_base, output_dir='.') if base.build != target.build: base.remap_snps(target.build) updated_base = base.save_snps("new_base_coordinates.txt", sep="\t", header=True)
class TestSnps(BaseSNPsTestCase): def setUp(self): self.snps_GRCh38 = SNPs("tests/input/GRCh38.csv") self.snps = SNPs("tests/input/chromosomes.csv") self.snps_only_detect_source = SNPs("tests/input/chromosomes.csv", only_detect_source=True) self.snps_none = SNPs(None) with open("tests/input/chromosomes.csv", "r") as f: self.snps_buffer = SNPs(f.read().encode("utf-8")) with atomic_write("tests/input/chromosomes.csv.zip", mode="wb", overwrite=True) as f: with zipfile.ZipFile(f, "w") as f_zip: f_zip.write("tests/input/chromosomes.csv", arcname="chromosomes.csv") with open("tests/input/chromosomes.csv.zip", "rb") as f: data = f.read() self.snps_buffer_zip = SNPs(data) os.remove("tests/input/chromosomes.csv.zip") with open("tests/input/chromosomes.csv", "rb") as f_in: with atomic_write("tests/input/chromosomes.csv.gz", mode="wb", overwrite=True) as f_out: with gzip.open(f_out, "wb") as f_gzip: shutil.copyfileobj(f_in, f_gzip) with open("tests/input/chromosomes.csv.gz", "rb") as f: data = f.read() self.snps_buffer_gz = SNPs(data) os.remove("tests/input/chromosomes.csv.gz") def snps_discrepant_pos(self): return self.create_snp_df(rsid=["rs3094315"], chrom=["1"], pos=[1], genotype=["AA"]) def test_assembly(self): assert self.snps_GRCh38.assembly == "GRCh38" def test_assembly_no_snps(self): assert self.snps_none.assembly == "" def test_snp_buffer_zip(self): assert self.snps_buffer_zip.snp_count == 6 def test_snp_buffer_gz(self): assert self.snps_buffer_gz.snp_count == 6 def test_snp_buffer(self): assert self.snps_buffer.snp_count == 6 def test_snp_count(self): assert self.snps.snp_count == 6 def test_snp_count_no_snps(self): assert self.snps_none.snp_count == 0 def test_chromosomes(self): assert self.snps.chromosomes == ["1", "2", "3", "5", "PAR", "MT"] def test_chromosomes_no_snps(self): assert self.snps_none.chromosomes == [] def test_chromosomes_summary(self): assert self.snps.chromosomes_summary == "1-3, 5, PAR, MT" def test_chromosomes_summary_no_snps(self): assert self.snps_none.chromosomes_summary == "" def test_build_no_snps(self): assert not self.snps_none.build def test_build_detected_no_snps(self): assert not self.snps_none.build_detected def test_build_detected_PAR_snps(self): if (not os.getenv("DOWNLOADS_ENABLED") or os.getenv("DOWNLOADS_ENABLED") == "true"): snps = SNPs("tests/input/GRCh37_PAR.csv") assert snps.build == 37 assert snps.build_detected def test_sex_no_snps(self): assert self.snps_none.sex == "" def test_sex_Male_Y_chrom(self): s = self.simulate_snps(chrom="Y", pos_start=1, pos_max=59373566, pos_step=10000) file = s.save_snps() snps = SNPs(file) assert snps.sex == "Male" def test_get_summary(self): assert self.snps_GRCh38.get_summary() == { "source": "generic", "assembly": "GRCh38", "build": 38, "build_detected": True, "snp_count": 4, "chromosomes": "1, 3", "sex": "", } def test_get_summary_no_snps(self): assert not self.snps_none.get_summary() def test_is_valid_True(self): assert self.snps_GRCh38.is_valid() def test_is_valid_False(self): assert not self.snps_none.is_valid() def test__read_raw_data(self): assert self.snps_none.snps.empty assert self.snps_none.source == "" def test__lookup_build_with_snp_pos_None(self): snps = SNPs() snps._snps = self.snps_discrepant_pos() assert not snps.detect_build() def test_get_assembly_None(self): snps = SNPs() assert snps.get_assembly() is "" def test_save_snps_source(self): assert (os.path.relpath( self.snps_GRCh38.save_snps()) == "output/generic_GRCh38.csv") snps = SNPs("output/generic_GRCh38.csv") pd.testing.assert_frame_equal(snps.snps, self.snps_GRCh38.snps) def test_save_snps_buffer(self): out = io.StringIO() self.snps.save_snps(out) assert out.read().startswith("# Generated by snps") def test_snps_only_detect_source(self): assert self.snps_only_detect_source.source == "generic" def test_duplicate_rsids(self): snps = SNPs("tests/input/duplicate_rsids.csv") result = self.create_snp_df(rsid=["rs1"], chrom=["1"], pos=[101], genotype=["AA"]) duplicate_snps = self.create_snp_df(rsid=["rs1", "rs1"], chrom=["1", "1"], pos=[102, 103], genotype=["CC", "GG"]) pd.testing.assert_frame_equal(snps.snps, result) pd.testing.assert_frame_equal(snps.duplicate_snps, duplicate_snps) def test_deduplicate_false(self): snps = SNPs("tests/input/duplicate_rsids.csv", deduplicate=False) result = self.create_snp_df( rsid=["rs1", "rs1", "rs1"], chrom=["1", "1", "1"], pos=[101, 102, 103], genotype=["AA", "CC", "GG"], ) pd.testing.assert_frame_equal(snps.snps, result)
def test_save_snps_no_snps(self): s = SNPs() assert not s.save_snps()
def test_save_snps_specify_file(self): s = SNPs("tests/input/GRCh37.csv") assert os.path.relpath(s.save_snps("snps.csv")) == "output/snps.csv" s_saved = SNPs("output/snps.csv") pd.testing.assert_frame_equal(s_saved.snps, self.snps_GRCh37())
def test_save_snps(self): snps = SNPs("tests/input/GRCh37.csv") assert os.path.relpath(snps.save_snps()) == "output/generic_GRCh37.csv" s_saved = SNPs("output/generic_GRCh37.csv") pd.testing.assert_frame_equal(s_saved.snps, self.snps_GRCh37())