예제 #1
0
 def test_remap_snps_invalid_assembly(self):
     s = SNPs("tests/input/GRCh37.csv")
     chromosomes_remapped, chromosomes_not_remapped = s.remap_snps(-1)
     assert s.build == 37
     assert s.assembly == "GRCh37"
     assert len(chromosomes_remapped) == 0
     assert len(chromosomes_not_remapped) == 2
예제 #2
0
 def test_remap_snps_37_to_37(self):
     s = SNPs("tests/input/GRCh37.csv")
     chromosomes_remapped, chromosomes_not_remapped = s.remap_snps(37)
     assert s.build == 37
     assert s.assembly == "GRCh37"
     assert len(chromosomes_remapped) == 0
     assert len(chromosomes_not_remapped) == 2
     pd.testing.assert_frame_equal(s.snps, self.snps_GRCh37())
예제 #3
0
 def test_remap_snps_37_to_36(self):
     s = SNPs("tests/input/GRCh37.csv")
     chromosomes_remapped, chromosomes_not_remapped = s.remap_snps(36)
     assert s.build == 36
     assert s.assembly == "NCBI36"
     assert len(chromosomes_remapped) == 2
     assert len(chromosomes_not_remapped) == 0
     pd.testing.assert_frame_equal(s.snps, self.snps_NCBI36())
예제 #4
0
 def test_remap_snps_36_to_37_multiprocessing(self):
     s = SNPs("tests/input/NCBI36.csv", parallelize=True)
     chromosomes_remapped, chromosomes_not_remapped = s.remap_snps(37)
     assert s.build == 37
     assert s.assembly == "GRCh37"
     assert len(chromosomes_remapped) == 2
     assert len(chromosomes_not_remapped) == 0
     pd.testing.assert_frame_equal(s.snps, self.snps_GRCh37())
예제 #5
0
 def f2():
     s = SNPs("tests/input/NCBI36.csv")
     chromosomes_remapped, chromosomes_not_remapped = s.remap_snps(37)
     self.assertEqual(s.build, 37)
     self.assertEqual(s.assembly, "GRCh37")
     self.assertEqual(len(chromosomes_remapped), 2)
     self.assertEqual(len(chromosomes_not_remapped), 0)
     pd.testing.assert_frame_equal(
         s.snps, self.snps_GRCh37(), check_exact=True
     )
예제 #6
0
 def test_remap_snps_37_to_38_with_PAR_SNP(self):
     if (not os.getenv("DOWNLOADS_ENABLED")
             or os.getenv("DOWNLOADS_ENABLED") == "true"):
         s = SNPs("tests/input/GRCh37_PAR.csv")
         assert s.snp_count == 3
         chromosomes_remapped, chromosomes_not_remapped = s.remap_snps(38)
         assert s.build == 38
         assert s.assembly == "GRCh38"
         assert len(chromosomes_remapped) == 2
         assert len(chromosomes_not_remapped) == 1
         assert s.snp_count == 2
         pd.testing.assert_frame_equal(s.snps, self.snps_GRCh38_PAR())
예제 #7
0
#! /usr/bin/env python
"""
Validate input VCF files & remap them to GRCh37.
depends on:
> python 3
> argparse==1.4.0
> snps==0.4.0
> io
"""

import argparse
from snps import SNPs
import io

parser = argparse.ArgumentParser(description='Remap VCF files to GRCh37')
parser.add_argument('-i', '--input_file', help='Input VCF file')
parser.add_argument('-o', '--output_file', help='Output VCF file basename')
args = vars(parser.parse_args())
input_file = args['input_file']
output_file = args['output_file']
output_file_name = f"{output_file}.vcf"

# read & validate input file
snps = SNPs(input_file)

# remap SNPs if reference genome is not GRCh37
if snps.build_detected and snps.build != 37:
    snps.remap_snps(37)

# save to file
saved_snps = snps.save_snps(output_file_name, sep="\t", header=False, vcf=True)
예제 #8
0
)
parser.add_argument(
    '-t',
    '--input_target',
    help=
    'Input BIM file (a combination of all BIM files, transformed into a 23andme-like format'
)
parser.add_argument(
    '-b',
    '--input_base',
    help='Input base file, transformed into a 23andme-like format')

args = vars(parser.parse_args())

# Args to variable
input_target = args['input_target']
input_base = args['input_base']

###############################################################################
# Detect builds and update the base's build if it does not match the target's #
###############################################################################

target = SNPs(input_target, output_dir='.')
base = SNPs(input_base, output_dir='.')

if base.build != target.build:
    base.remap_snps(target.build)
    updated_base = base.save_snps("new_base_coordinates.txt",
                                  sep="\t",
                                  header=True)
예제 #9
0
 def test_remap_snps_no_snps(self):
     s = SNPs()
     chromosomes_remapped, chromosomes_not_remapped = s.remap_snps(38)
     assert not s.build
     assert len(chromosomes_remapped) == 0
     assert len(chromosomes_not_remapped) == 0