def test_make_data_directory(self): new_dir = os.path.join(TestChromosome.TEST_DATA_DIR, "test") self.assertFalse(os.path.isdir(new_dir)) os.environ['DATA_DIR_VARIABLE'] = new_dir get_data_directory() self.assertTrue(os.path.isdir(new_dir)) os.rmdir(new_dir)
def test_make_data_directory(self): new_dir = os.path.join(TestChromosome.TEST_DATA_DIR, "test") self.assertFalse(os.path.isdir(new_dir)) os.environ[DATA_DIR_VARIABLE] = new_dir get_data_directory() self.assertTrue(os.path.isdir(new_dir)) os.rmdir(new_dir)
def test_get_data_directory(self): data_dir = get_data_directory() self.assertEqual(TestChromosome.TEST_DATA_DIR, data_dir)
def test_file_names(self): for accession in BUILD38_ACCESSIONS.values(): fasta = os.path.join(get_data_directory(), str(accession) + ".fa") self.assertTrue(os.path.isfile(fasta), fasta)
class TestBuild37(TestCase): GRCH37_PATH = os.path.join(get_data_directory(), 'homo_sapiens_GRCh37') def test_file_count(self): file_count = len( fnmatch.filter(os.listdir(TestBuild37.GRCH37_PATH), '*.fa')) self.assertEqual(file_count, 25) def test_file_names(self): for name in BUILD37_CHROMOSOMES.keys(): fasta = os.path.join(TestBuild37.GRCH37_PATH, "chr" + str(name) + ".fa") self.assertTrue(os.path.isfile(fasta)) # all test sequences were extracted from https://genome.ucsc.edu/ using the # chromosome browser tool def test_chr_start_sequences(self): test_str = "N" * 20 for name in BUILD37_CHROMOSOMES.keys(): # these chromosomes do not have telomeres if name == 'MT' or name == '17': continue seq = Chromosome(name).sequence(0, 20) self.assertEqual(seq, test_str) def test_chr1_sequence(self): expected_seq = "AATCTAAAAAACTGTCAGAT" seq = Chromosome(1).sequence(243400000, 243400020) self.assertEqual(expected_seq, seq) def test_chr2_sequence(self): expected_seq = "tgtccacgcgcggatgtcgt" seq = Chromosome(2).sequence(237513040, 237513060) self.assertEqual(expected_seq, seq) def test_chr3_sequence(self): expected_seq = "ctctttcgcccaggctggag" seq = Chromosome(3).sequence(190352536, 190352556) self.assertEqual(expected_seq, seq) def test_chr4_sequence(self): expected_seq = "ttggagccaaggtctcactc" seq = Chromosome(4).sequence(184622015, 184622035) self.assertEqual(expected_seq, seq) def test_chr5_sequence(self): expected_seq = "CTTTACTCCACTCATATTCT" seq = Chromosome(5).sequence(158879589, 158879609) self.assertEqual(expected_seq, seq) def test_chr6_sequence(self): expected_seq = "AGGTGGTAGCCCAGTGGTGC" seq = Chromosome(6).sequence(158882594, 158882614) self.assertEqual(expected_seq, seq) def test_chr7_sequence(self): expected_seq = "CTTGCTCTCATCCTCCGGGT" seq = Chromosome(7).sequence(158896447, 158896467) self.assertEqual(expected_seq, seq) def test_chr8_sequence(self): expected_seq = "CTGTCTCCACTGCAGGGCTC" seq = Chromosome(8).sequence(139508913, 139508933) self.assertEqual(expected_seq, seq) def test_chr9_sequence(self): expected_seq = "GAGGAGAACATTTGCCTGCA" seq = Chromosome(9).sequence(140705912, 140705932) self.assertEqual(expected_seq, seq) def test_chr10_sequence(self): expected_seq = "TCTGCAGGGGGCGGAGGAAA" seq = Chromosome(10).sequence(121086020, 121086040) self.assertEqual(expected_seq, seq) def test_chr11_sequence(self): expected_seq = "CTGAGGGTGGCGCTCTCCCC" seq = Chromosome(11).sequence(132812820, 132812840) self.assertEqual(expected_seq, seq) def test_chr12_sequence(self): expected_seq = "CCTCATGCCCAGTTCTACGT" seq = Chromosome(12).sequence(132824462, 132824482) self.assertEqual(expected_seq, seq) def test_chr13_sequence(self): expected_seq = "GAAAAGAATTCAAAGAACAC" seq = Chromosome(13).sequence(113086756, 113086776) self.assertEqual(expected_seq, seq) def test_chr14_sequence(self): expected_seq = "GCAACGGGGTGGTCATCCAC" seq = Chromosome(14).sequence(105204712, 105204732) self.assertEqual(expected_seq, seq) def test_chr15_sequence(self): expected_seq = "ttcaatcactgatacccttt" seq = Chromosome(15).sequence(99921491, 99921511) self.assertEqual(expected_seq, seq) def test_chr16_sequence(self): expected_seq = "CTTTCAGCACAGGGCTGTGA" seq = Chromosome(16).sequence(89862313, 89862333) self.assertEqual(expected_seq, seq) def test_chr17_sequence(self): expected_seq = "TGGAGCTGGAGCCACAGGTC" seq = Chromosome(17).sequence(80014178, 80014198) self.assertEqual(expected_seq, seq) def test_chr18_sequence(self): expected_seq = "CGAACACTTCGTTGTCCTCT" seq = Chromosome(18).sequence(74778253, 74778273) self.assertEqual(expected_seq, seq) def test_chr19_sequence(self): expected_seq = "GGCTGGTTAAACTCGGGGTC" seq = Chromosome(19).sequence(55798374, 55798394) self.assertEqual(expected_seq, seq) def test_chr20_sequence(self): expected_seq = "CTGCCCAAGTGCTCCTGGAG" seq = Chromosome(20).sequence(55803284, 55803304) self.assertEqual(expected_seq, seq) def test_chr21_sequence(self): expected_seq = "GGCTGGTGTGGCACATGATG" seq = Chromosome(21).sequence(46074515, 46074535) self.assertEqual(expected_seq, seq) def test_chr22_sequence(self): expected_seq = "AGACGCCGCCCCTGTTCATG" seq = Chromosome(22).sequence(50552076, 50552096) self.assertEqual(expected_seq, seq) def test_chrX_sequence(self): expected_seq = "GCAAGCAGCAGGATGGGGCC" seq = Chromosome("X").sequence(152811545, 152811565) self.assertEqual(expected_seq, seq) def test_chrY_sequence(self): expected_seq = "CTGAACGTGCTGAGTTACAG" seq = Chromosome("Y").sequence(25325643, 25325663) self.assertEqual(expected_seq, seq) def test_chrMT_sequence(self): expected_seq = "TATTGTACGGTACCATAAAT" seq = Chromosome("MT").sequence(16121, 16141) self.assertEqual(expected_seq, seq)
class TestBuild38(TestCase): GRCH38_PATH = os.path.join(get_data_directory(), 'homo_sapiens_GRCh38') def test_file_count(self): file_count = len(fnmatch.filter(os.listdir(TestBuild38.GRCH38_PATH), '*.fa')) self.assertEqual(file_count, 25) def test_file_names(self): for name in BUILD38_CHROMOSOMES.keys(): fasta = os.path.join(TestBuild38.GRCH38_PATH, "chr" + str(name) + ".fa") self.assertTrue(os.path.isfile(fasta)) # all test sequences were extracted from https://genome.ucsc.edu/ using the # chromosome browser tool def test_chr_start_sequences(self): test_str = "N" * 20 for name in BUILD38_CHROMOSOMES.keys(): # these chromosomes do not have telomeres if name == 'MT'or name == '17': continue seq = Chromosome(name, assembly=BUILD38).sequence(0, 20) self.assertEqual(seq, test_str) def test_chr1_sequence(self): expected_seq = "ACAGGAAAAAGATAGCATTC" seq = Chromosome(1, assembly=BUILD38).sequence(243415701, 243415721) self.assertEqual(expected_seq, seq) def test_chr2_sequence(self): expected_seq = "GCTGGGCCTGAACTGATATC" seq = Chromosome(2, assembly=BUILD38).sequence(237518537, 237518557) self.assertEqual(expected_seq, seq) def test_chr3_sequence(self): expected_seq = "GCTGAAGTCATCGATGTGAG" seq = Chromosome(3, assembly=BUILD38).sequence(175256410, 175256430) self.assertEqual(expected_seq, seq) def test_chr4_sequence(self): expected_seq = "CTGtttctgaccacagcctc" seq = Chromosome(4, assembly=BUILD38).sequence(184624738, 184624758) self.assertEqual(expected_seq, seq) def test_chr5_sequence(self): expected_seq = "CTGTCAATTATCACTGGATC" seq = Chromosome(5, assembly=BUILD38).sequence(159073395, 159073415) self.assertEqual(expected_seq, seq) def test_chr6_sequence(self): expected_seq = "GATGCACGCTGCTGTTTTAT" seq = Chromosome(6, assembly=BUILD38).sequence(155144605, 155144625) self.assertEqual(expected_seq, seq) def test_chr7_sequence(self): expected_seq = "GAGCTGGTGGGGAGTAACCC" seq = Chromosome(7, assembly=BUILD38).sequence(154446213, 154446233) self.assertEqual(expected_seq, seq) def test_chr8_sequence(self): expected_seq = "atcgtggcgtgttctgcagg" seq = Chromosome(8, assembly=BUILD38).sequence(132447200, 132447220) self.assertEqual(expected_seq, seq) def test_chr9_sequence(self): expected_seq = "GAACCCTCTCATCGTCAAGG" seq = Chromosome(9, assembly=BUILD38).sequence(132410447, 132410467) self.assertEqual(expected_seq, seq) def test_chr10_sequence(self): expected_seq = "TTCAGGTTCCTTTGCAGCTC" seq = Chromosome(10, assembly=BUILD38).sequence(122849420, 122849440) self.assertEqual(expected_seq, seq) def test_chr11_sequence(self): expected_seq = "TTTTTAAATGAGTATCCTGG" seq = Chromosome(11, assembly=BUILD38).sequence(122850195, 122850215) self.assertEqual(expected_seq, seq) def test_chr12_sequence(self): expected_seq = "CATCCCCAGTTTCCCGCGGG" seq = Chromosome(12, assembly=BUILD38).sequence(122850834, 122850854) self.assertEqual(expected_seq, seq) def test_chr13_sequence(self): expected_seq = "CCCCCCGAAAAGGGCAAAGG" seq = Chromosome(13, assembly=BUILD38).sequence(113089709, 113089729) self.assertEqual(expected_seq, seq) def test_chr14_sequence(self): expected_seq = "CCCATGTAGTCCAGGTCAGA" seq = Chromosome(14, assembly=BUILD38).sequence(100353686, 100353706) self.assertEqual(expected_seq, seq) def test_chr15_sequence(self): expected_seq = "attaaaatcatccaatttcc" seq = Chromosome(15, assembly=BUILD38).sequence(86987986, 86988006) self.assertEqual(expected_seq, seq) def test_chr16_sequence(self): expected_seq = "TTTCAAGCCACAGTCGAGGA" seq = Chromosome(16, assembly=BUILD38).sequence(83670789, 83670809) self.assertEqual(expected_seq, seq) def test_chr17_sequence(self): expected_seq = "aaacatcatctctaccaaaa" seq = Chromosome(17, assembly=BUILD38).sequence(80014178, 80014198) self.assertEqual(expected_seq, seq) def test_chr18_sequence(self): expected_seq = "TGCAAAGAGAAATCCTTgga" seq = Chromosome(18, assembly=BUILD38).sequence(67834418, 67834438) self.assertEqual(expected_seq, seq) def test_chr19_sequence(self): expected_seq = "CTGGGCTGCAGAATCGCTGG" seq = Chromosome(19, assembly=BUILD38).sequence(45500047, 45500067) self.assertEqual(expected_seq, seq) def test_chr20_sequence(self): expected_seq = "ATGAGATGGACCAAACGCCC" seq = Chromosome(20, assembly=BUILD38).sequence(59743106, 59743126) self.assertEqual(expected_seq, seq) def test_chr21_sequence(self): expected_seq = "GGCCCCCCCGGACCACCAGG" seq = Chromosome(21, assembly=BUILD38).sequence(45497642, 45497662) self.assertEqual(expected_seq, seq) def test_chr22_sequence(self): expected_seq = "CTTTTCATTAACTGGATAAA" seq = Chromosome(22, assembly=BUILD38).sequence(43711474, 43711494) self.assertEqual(expected_seq, seq) def test_chrX_sequence(self): expected_seq = "GGACAACACCtgttaggggc" seq = Chromosome("X", assembly=BUILD38).sequence(152811545, 152811565) self.assertEqual(expected_seq, seq) def test_chrY_sequence(self): expected_seq = "CAGACCTTCTGCAGTGCACC" seq = Chromosome("Y", assembly=BUILD38).sequence(25325643, 25325663) self.assertEqual(expected_seq, seq) def test_chrMT_sequence(self): expected_seq = "ATTGTACGGTACCATAAATA" seq = Chromosome("MT", assembly=BUILD38).sequence(16121, 16141) self.assertEqual(expected_seq, seq)
def test_file_names(self): for accession in BUILD37_ACCESSIONS.values(): fasta = os.path.join(get_data_directory(), str(accession) + ".fa") self.assertTrue(os.path.isfile(fasta), fasta)