def test_write_withallrefpos_matrix(self): expected_files = ['TestContig_withallrefpos.tsv'] expected_lines = ( '\t'.join(write_matrix.get_header('withallrefpos', identifiers)) + '\n', 'TestContig::1 A C G T R Y K M S W B D H V N . a c g t r y k m s w b d h v n 2 0 1 -19/4 3/4 4/4 5 6 7 8 0 9 TestContig 1 True True YYYYNNNNNNNNNNNNYYYYNNNNNNNNNNN YYYYYYYYYYYYNNNNNNNNNNNN NNNNNNNNNNNNYYYYYYYYYYYY 1NNNNNNNNNNNNNNNNNNNNNNN\n', 'TestContig::2 A C G T R Y K M S W B D H V N . a c g t r y k m s w b d h v n 2 0 1 -19/4 3/4 4/4 5 6 7 8 0 9 TestContig 2 True True YYYYNNNNNNNNNNNNYYYYNNNNNNNNNNN YYYYYYYYYYYYNNNNNNNNNNNN NNNNNNNNNNNNYYYYYYYYYYYY 1NNNNNNNNNNNNNNNNNNNNNN1\n', 'TestContig::3 A C G T R Y K M S W B D H V N . a c g t r y k m s w b d h v n 2 0 1 -19/4 3/4 4/4 5 6 7 8 0 9 TestContig 3 True True YYYYNNNNNNNNNNNNYYYYNNNNNNNNNNN YYYYYYYYYYYYNNNNNNNNNNNN NNNNNNNNNNNNYYYYYYYYYYYY 1NNNNNNNNNNNNNNNNNNNNNN2\n' ) with TemporaryDirectory() as tmpdir: withallrefpos_dir = os.path.join(tmpdir, 'withallrefpos.tsv') writer = write_matrix.write_withallrefpos_matrix(tmpdir, contig_name, identifiers) writer.send(None) for position in self.positions: writer.send(position) writer.close() # The file was created. self.assertListEqual(expected_files, os.listdir(withallrefpos_dir)) with open(os.path.join(withallrefpos_dir, expected_files[0])) as handle: # The file contains all the expected rows. for expected_line, line in zip(expected_lines, handle): self.assertEqual(expected_line, line) # The file does not contain any unexpected rows. self.assertEqual([], handle.readlines()) # No other artifacts were created in the tmpdir. self.assertListEqual(expected_files, os.listdir(withallrefpos_dir))
def test_write_bestsnp_vcf(self): expected_files = ['TestContig_bestsnp.vcf'] expected_lines = ( self.metadata, '\t'.join(write_matrix.get_header('vcf', identifiers)) + '\n', 'TestContig 3 . A C,G,T,R,Y,K,M,S,W,B,D,H,V,N,.,a,c,g,t,r,y,k,m,s,w,b,d,h,v,n . PASS AN=31;NS=3 GT:FT\n' ) with TemporaryDirectory() as tmpdir: writer = write_matrix.write_bestsnp_vcf(tmpdir, contig_name, identifiers, self.metadata) writer.send(None) for position in self.positions: writer.send(position) writer.close() # The file was created. self.assertListEqual(expected_files, os.listdir(tmpdir)) with open(os.path.join(tmpdir, expected_files[0])) as handle: # The file contains all the expected rows. for expected_line, line in itertools.zip_longest(expected_lines, handle): self.assertEqual(expected_line, line) # The file does not contain any unexpected rows. self.assertEqual([], handle.readlines()) # No other artifacts were created in the tmpdir. self.assertListEqual(expected_files, os.listdir(tmpdir))
def test_write_bestsnp_matrix(self): identifiers = tuple(map(lambda sample: sample[0].name, sample_groups)) expected_files = ['TestContig_bestsnp.tsv'] expected_lines = ( '\t'.join(write_matrix.get_header('best_snp', identifiers)) + '\n', 'TestContig::3 A C G T R Y K M S W B D H V N . a c g t r y k m s w b d h v n 2 0 1 -19/4 3/4 4/4 5 6 7 8 0 9 TestContig 3 True True 1NNNNNNNNNNNNNNNNNNNNNN2\n', ) with TemporaryDirectory() as tmpdir: bestsnp_dir = os.path.join(tmpdir, 'bestsnp.tsv') writer = write_matrix.write_bestsnp_matrix(tmpdir, contig_name, sample_groups) writer.send(None) for position in self.positions: writer.send(position) writer.close() # The file was created. self.assertListEqual(expected_files, os.listdir(bestsnp_dir)) with open(os.path.join(bestsnp_dir, expected_files[0])) as handle: # The file contains all the expected rows. for expected_line, line in itertools.zip_longest(expected_lines, handle): self.assertEqual(expected_line, line) # The file does not contain any unexpected rows. # self.assertEqual([], handle.readlines()) # No other artifacts were created in the tmpdir. self.assertListEqual(expected_files, os.listdir(bestsnp_dir))
def test_write_missingdata_matrix(self): expected_files = ['TestContig_missingdata.tsv'] expected_lines = ( '\t'.join(write_matrix.get_header('missingdata', identifiers)) + '\n', 'TestContig::2 A C G T R Y K M S W B D H V N . a c g t r y k m s w b d h v n 2 0 1 7/30 3/30 4/30 5 6 7 8 0 9 TestContig 2 True True YYYYNNNNNNNNNNNNYYYYNNNNNNNNNNN YYYYYYYYYYYYNNNNNNNNNNNN NNNNNNNNNNNNYYYYYYYYYYYY 1NNNNNNNNNNNNNNNNNNNNNN1\n', ) with TemporaryDirectory() as tmpdir: missingdata_dir = os.path.join(tmpdir, 'missingdata.tsv') writer = write_matrix.write_missingdata_matrix(tmpdir, contig_name, identifiers) writer.send(None) for position in self.positions: writer.send(position) writer.close() # The file was created. self.assertListEqual(expected_files, os.listdir(missingdata_dir)) with open(os.path.join(missingdata_dir, expected_files[0])) as handle: # The file contains all the expected rows. for expected_line, line in itertools.zip_longest(expected_lines, handle): self.assertEqual(expected_line, line) # The file does not contain any unexpected rows. # self.assertEqual([], handle.readlines()) # No other artifacts were created in the tmpdir. self.assertListEqual(expected_files, os.listdir(missingdata_dir))
def test_withallrefpos(self): expected = ( 'LocusID', 'Reference', 'sample1::aligner,snpcaller', 'sample2::aligner1,snpcaller', 'sample2::aligner2', 'sample3::aligner,snpcaller', '#SNPcall', '#Indelcall', '#Refcall', '#CallWasMade', '#PassedDepthFilter', '#PassedProportionFilter', '#A', '#C', '#G', '#T', '#Indel', '#NXdegen', 'Contig', 'Position', 'InDupRegion', 'SampleConsensus', 'Pattern', 'Pattern#' ) self.assertEqual(expected, write_matrix.get_header('withallrefpos', identifiers))
def test_vcf(self): expected = ( '#CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT', 'sample1::aligner,snpcaller', 'sample2::aligner1,snpcaller', 'sample2::aligner2', 'sample3::aligner,snpcaller' ) self.assertEqual(expected, write_matrix.get_header('vcf', identifiers))
def test_write_general_stats(self): expected_files = ['general_stats.tsv'] expected_lines = ( self.metadata, '\t'.join(write_matrix.get_header('vcf', identifiers)) + '\n', '', ) contig_stats = Counter( {'reference_clean': 8, 'all_passed_proportion': 8, 'quality_breadth': 8, 'Contig': 'ContigWithFilePositionOffset', 'reference_length': 8, 'best_snps': 0, 'all_passed_coverage': 8, 'reference_duplicated': 0, 'all_passed_consensus': 8, 'all_called': 8, 'any_snps': 0} ) with TemporaryDirectory() as tmpdir: writer = write_matrix.write_general_stats(os.path.join(tmpdir, expected_files[0]), contig_stats) writer.send(None) for position in self.positions: writer.send(position) writer.close() # The file was created. self.assertListEqual(expected_files, os.listdir(tmpdir)) with open(os.path.join(tmpdir, expected_files[0])) as handle: # The file contains all the expected rows. for expected_line, line in zip(expected_lines, handle): self.assertEqual(expected_line, line) # The file does not contain any unexpected rows. self.assertEqual([], handle.readlines()) # No other artifacts were created in the tmpdir. self.assertListEqual(expected_files, os.listdir(tmpdir))
def test_undefined_header(self): # It should warn the developer if they request an undefined header. with self.assertRaises(ValueError): write_matrix.get_header('undefined', identifiers)