def test_init(self): '''Test init''' fofn = os.path.join(data_dir, 'summary_test_init.fofn') s = summary.Summary('out', fofn=fofn) self.assertEqual(s.filenames, ['file1', 'file2']) s = summary.Summary('out', filenames=['file42']) self.assertEqual(s.filenames, ['file42']) s = summary.Summary('out', fofn=fofn, filenames=['file42']) self.assertEqual(s.filenames, ['file42', 'file1', 'file2'])
def test_load_file(self): '''Test _load_file''' s = summary.Summary('out', filenames=['spam', 'eggs']) infile = os.path.join(data_dir, 'summary_test_load_file.in.tsv') lines = [[ 'gene1', '27', '42', '1', '822', '822', '100.0', '.', '.', '.', '.', '.', '.', 'gene1.scaffold.1', '1490', '.', '.', '.', '.', '.', '.' ], [ 'gene2', '15', '44', '2', '780', '780', '100.0', '.', '.', '.', '.', '.', '.', 'gene2.scaffold.2', '1124', '.', '.', '.', '.', '.', '.' ], [ 'gene2', '15', '46', '2', '780', '770', '99.0', '.', '.', '.', '.', '.', '.', 'gene2.scaffold.3', '1097', '.', '.', '.', '.', '.', '.' ], [ 'gene3', '187', '48', '3', '750', '750', '98.93', 'SNP', 'SYN', '.', '318', '318', 'C', 'gene3.scaffold.1', '1047', '319', '319', 'G', '.', '.', '.' ]] dicts = [s._line2dict('\t'.join(x)) for x in lines] expected = { 'gene1': [dicts[0]], 'gene2': dicts[1:3], 'gene3': [dicts[3]] } got = s._load_file(infile) self.assertEqual(expected, got)
def test_line2dict(self): '''Test _line2dict''' line = '\t'.join([ 'gene1', '187', '42', '3', '750', '750', '98.93', 'SNP', 'SYN', '.', '66', '66', 'A', 'gene1.scaffold.1', '1047', '67', '67', 'C', '42', 'A', '22,20' ]) s = summary.Summary('out', filenames=['spam', 'eggs']) expected = { 'gene': 'gene1', 'flag': flag.Flag(187), 'reads': 42, 'cluster': '3', 'gene_len': 750, 'assembled': 750, 'pc_ident': 98.93, 'var_type': 'SNP', 'var_effect': 'SYN', 'new_aa': '.', 'gene_start': 66, 'gene_end': 66, 'gene_nt': 'A', 'scaffold': 'gene1.scaffold.1', 'scaff_len': 1047, 'scaff_start': 67, 'scaff_end': 67, 'scaff_nt': 'C', 'read_depth': 42, 'alt_bases': 'A', 'ref_alt_depth': '22,20' } self.assertEqual(s._line2dict(line), expected)
def test_whole_run(self): '''Test whole run to check csv ok (skip making tree)''' tmp_out = 'tmp.summary_test_whole_run.out''' infiles = [ os.path.join(data_dir, 'summary_test_whole_run.in.1.tsv'), os.path.join(data_dir, 'summary_test_whole_run.in.2.tsv'), ] s = summary.Summary( tmp_out, filenames=infiles, make_phandango_tree=False, show_var_groups=True, show_known_vars=True, show_novel_vars=True ) s.run() expected_file = os.path.join(data_dir, 'summary_test_whole_run.out.csv') # we don't know the full path of the input files, so check all the other columns with open(expected_file) as f: expected = [line.rstrip().split(',', maxsplit=1)[1] for line in f] with open(tmp_out + '.csv') as f: got = [line.rstrip().split(',', maxsplit=1)[1] for line in f] self.assertEqual(expected, got) os.unlink(tmp_out + '.csv') os.unlink(tmp_out + '.phandango.csv')
def test_to_matrix_all_cols(self): '''Test _to_matrix all columns''' infiles = [ os.path.join(data_dir, 'summary_to_matrix.1.tsv'), os.path.join(data_dir, 'summary_to_matrix.2.tsv') ] fofn = 'tmp.summary_to_matrix_all_cols' with open(fofn, 'w') as f: print(infiles[0], 'sample1', file=f) print(infiles[1], file=f) s = summary.Summary('out', fofn=fofn, show_var_groups=True, show_known_vars=True, show_novel_vars=True) os.unlink(fofn) s.samples = summary.Summary._load_input_files(s.filenames, 90) s._gather_unfiltered_output_data() got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(s.filenames, s.all_data, s.all_potential_columns, s.cluster_columns) expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding1.id3.%:c2', 'noncoding1.14GT:o1', 'noncoding1.14GT.%:c2', 'noncoding1.14T:o1', 'noncoding1.14T.%:c2', 'noncoding1.6G:o1', 'noncoding1.6G.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.id2:o1', 'noncoding2.id2.%:c2', 'noncoding2.42T:o1', 'noncoding2.42T.%:c2', 'noncoding2.52GT:o1', 'noncoding2.52GT.%:c2', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1', 'presence_absence1.A10V:o1'] expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding1.id3.%', 'noncoding1.14GT', 'noncoding1.14GT.%', 'noncoding1.14T', 'noncoding1.14T.%', 'noncoding1.6G', 'noncoding1.6G.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2.id2.%', 'noncoding2.42T', 'noncoding2.42T.%', 'noncoding2.52GT', 'noncoding2.52GT.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.known_var', 'presence_absence1.novel_var', 'presence_absence1.A10V'] expected_matrix = [ ['sample1', 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 100.0, 'no', 'NA', 'no', 'NA', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 100.0, 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes', 'yes'], [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 100.0, 'het', 80.0, 'no', 'NA', 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'het', 40.0, 'no', 'NA', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes', 'yes'] ] self.assertEqual(expected_phandango_header, got_phandango_header) self.assertEqual(expected_csv_header, got_csv_header) self.assertEqual(expected_matrix, got_matrix)
def test_to_matrix_assembled_only(self): '''Test _to_matrix with assembled column only''' infiles = [ os.path.join(data_dir, 'summary_to_matrix.1.tsv'), os.path.join(data_dir, 'summary_to_matrix.2.tsv') ] s = summary.Summary('out', filenames=infiles, cluster_cols='assembled') s.samples = summary.Summary._load_input_files(s.filenames, 90) s._gather_unfiltered_output_data() got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix( s.filenames, s.all_data, s.all_potential_columns, s.cluster_columns) expected_phandango_header = [ 'name', 'noncoding1.assembled:o1', 'noncoding2.assembled:o1', 'presence_absence1.assembled:o1' ] expected_csv_header = [ 'name', 'noncoding1.assembled', 'noncoding2.assembled', 'presence_absence1.assembled' ] expected_matrix = [[infiles[0], 'yes', 'yes', 'yes'], [infiles[1], 'yes', 'yes', 'yes']] self.assertEqual(expected_phandango_header, got_phandango_header) self.assertEqual(expected_csv_header, got_csv_header) self.assertEqual(expected_matrix, got_matrix)
def test_to_matrix_with_groups(self): '''Test _to_matrix with groups''' infiles = [ os.path.join(data_dir, 'summary_to_matrix.1.tsv'), os.path.join(data_dir, 'summary_to_matrix.2.tsv') ] s = summary.Summary('out', filenames=infiles, show_var_groups=True) s.samples = summary.Summary._load_input_files(s.filenames, 90) s._gather_unfiltered_output_data() got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix( s.filenames, s.all_data, s.all_potential_columns, s.cluster_columns) expected_phandango_header = [ 'name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.ctg_cov:c3', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding1.id3.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.ctg_cov:c3', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.id2:o1', 'noncoding2.id2.%:c2', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.ctg_cov:c3', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1' ] expected_csv_header = [ 'name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.ctg_cov', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding1.id3.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.ctg_cov', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2.id2.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.ctg_cov', 'presence_absence1.known_var', 'presence_absence1.novel_var' ] expected_matrix = [[ infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', '10.0', 'yes', 'no', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', '10.0', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 'yes', 'presence_absence_ref1', '98.96', '20.1', 'no', 'yes' ], [ infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', '50.1', 'yes', 'no', 'het', 80.0, 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', '10.0', 'yes', 'no', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', '51.1', 'no', 'yes' ]] self.assertEqual(expected_phandango_header, got_phandango_header) self.assertEqual(expected_csv_header, got_csv_header) self.assertEqual(expected_matrix, got_matrix)
def test_write_tsv(self): '''Test _write_tsv''' tmp_out = 'tmp.out.tsv' s = summary.Summary(tmp_out, filenames=['spam', 'eggs']) s.rows_out = [ ['filename', 'gene1', 'gene3'], ['file2', 1, 3], ['file3', 2, 4], ] s._write_tsv() expected = os.path.join(data_dir, 'summary_test_write_tsv.out.tsv') self.assertTrue(filecmp.cmp(tmp_out, expected, shallow=False)) os.unlink(tmp_out)
def test_gather_output_rows(self): '''Test _gather_output_rows''' infiles = [ os.path.join(data_dir, 'summary_test_gather_output_rows.in.1.tsv'), os.path.join(data_dir, 'summary_test_gather_output_rows.in.2.tsv') ] s = summary.Summary('out', filenames=infiles) s._gather_output_rows() expected = [ ['filename', 'gene1', 'gene2', 'gene3'], [infiles[0], 3, 2, 0], [infiles[1], 3, 0, 3], ] self.assertEqual(expected, s.rows_out)
def test_to_summary_number(self): '''Test _to_summary_number''' s = summary.Summary('out', filenames=['spam', 'eggs']) tests = [ (0, 0), (64, 0), (7, 1), (259, 1), (15, 2), (27, 3), ] for t in tests: l = [{'flag': flag.Flag(t[0]), 'assembled': 42, 'pc_ident': 99}] self.assertEqual(s._to_summary_number(l), t[1]) l = [{'flag': flag.Flag(27), 'assembled': 42, 'pc_ident': 89}] self.assertEqual(s._to_summary_number(l), 0)
def test_filter_output_rows(self): '''Test _filter_output_rows''' s = summary.Summary('out', filenames=['spam', 'eggs']) s.rows_out = [ ['filename', 'gene1', 'gene2', 'gene3'], ['file1', 0, 0, 0], ['file2', 1, 0, 3], ['file3', 2, 0, 4], ] expected = [ ['filename', 'gene1', 'gene3'], ['file2', 1, 3], ['file3', 2, 4], ] s._filter_output_rows() self.assertEqual(s.rows_out, expected)
def test_to_matrix_cluster_only(self): '''Test _to_matrix with cluster columns only''' infiles = [ os.path.join(data_dir, 'summary_to_matrix.1.tsv'), os.path.join(data_dir, 'summary_to_matrix.2.tsv') ] s = summary.Summary('out', filenames=infiles) s.samples = summary.Summary._load_input_files(s.filenames, 90) s._gather_unfiltered_output_data() got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(s.filenames, s.all_data, s.all_potential_columns, s.cluster_columns) expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1'] expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.known_var', 'presence_absence1.novel_var'] expected_matrix = [ [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes'], [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes'] ] self.assertEqual(expected_phandango_header, got_phandango_header) self.assertEqual(expected_csv_header, got_csv_header) self.assertEqual(expected_matrix, got_matrix)
def test_to_matrix_with_vars(self): '''Test _to_matrix with vars''' infiles = [ os.path.join(data_dir, 'summary_to_matrix.1.tsv'), os.path.join(data_dir, 'summary_to_matrix.2.tsv') ] s = summary.Summary('out', filenames=infiles, show_known_vars=True, show_novel_vars=True) s.samples = summary.Summary._load_input_files(s.filenames, 90) s._gather_unfiltered_output_data() got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(s.filenames, s.all_data, s.all_potential_columns, s.cluster_columns) expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.14GT:o1', 'noncoding1.14GT.%:c2', 'noncoding1.14T:o1', 'noncoding1.14T.%:c2', 'noncoding1.6G:o1', 'noncoding1.6G.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.42T:o1', 'noncoding2.42T.%:c2', 'noncoding2.52GT:o1', 'noncoding2.52GT.%:c2', 'presence_absence1.assembled:o1', 'presence_absence1.match:o1', 'presence_absence1.ref_seq:o4', 'presence_absence1.pct_id:c1', 'presence_absence1.known_var:o1', 'presence_absence1.novel_var:o1', 'presence_absence1.A10V:o1'] expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.14GT', 'noncoding1.14GT.%', 'noncoding1.14T', 'noncoding1.14T.%', 'noncoding1.6G', 'noncoding1.6G.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.42T', 'noncoding2.42T.%', 'noncoding2.52GT', 'noncoding2.52GT.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'presence_absence1.pct_id', 'presence_absence1.known_var', 'presence_absence1.novel_var', 'presence_absence1.A10V'] expected_matrix = [ [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'no', 'NA', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes', 100.0, 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes', 'yes'], [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'no', 'NA', 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'no', 'NA', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes', 'yes'] ] self.assertEqual(expected_phandango_header, got_phandango_header) self.assertEqual(expected_csv_header, got_csv_header) self.assertEqual(expected_matrix, got_matrix)
def test_gather_unfiltered_output_data(self): '''test gather_unfiltered_output_data''' infiles = [ os.path.join(data_dir, 'summary_gather_unfiltered_output_data.in.1.tsv'), os.path.join(data_dir, 'summary_gather_unfiltered_output_data.in.2.tsv') ] expected_all = { infiles[0]: { 'noncoding1': { 'summary': { 'assembled': 'yes', 'known_var': 'yes', 'match': 'yes', 'novel_var': 'no', 'pct_id': '98.33', 'ref_seq': 'noncoding_ref1' }, 'groups': {}, 'vars': {}, }, 'noncoding2': { 'summary': { 'assembled': 'yes', 'known_var': 'yes', 'match': 'yes', 'novel_var': 'no', 'pct_id': '98.33', 'ref_seq': 'noncoding_ref2' }, 'groups': {}, 'vars': {}, }, 'presence_absence1': { 'summary': { 'assembled': 'yes', 'known_var': 'no', 'match': 'yes', 'novel_var': 'yes', 'pct_id': '98.96', 'ref_seq': 'presence_absence_ref1' }, 'groups': {}, 'vars': {}, }, 'presence_absence2': { 'summary': { 'assembled': 'partial', 'known_var': 'no', 'match': 'no', 'novel_var': 'yes', 'pct_id': '99.1', 'ref_seq': 'presence_absence_ref2' }, 'groups': {}, 'vars': {} } }, infiles[1]: { 'noncoding1': { 'summary': {'assembled': 'yes', 'known_var': 'yes', 'match': 'yes', 'novel_var': 'no', 'pct_id': '98.33', 'ref_seq': 'noncoding_ref1' }, 'groups': {}, 'vars': {}, }, 'noncoding2': { 'summary': { 'assembled': 'yes', 'known_var': 'yes', 'match': 'yes', 'novel_var': 'no', 'pct_id': '98.33', 'ref_seq': 'noncoding_ref2' }, 'groups': {}, 'vars': {}, }, 'presence_absence1': { 'summary': { 'assembled': 'yes', 'known_var': 'no', 'match': 'yes', 'novel_var': 'yes', 'pct_id': '98.96', 'ref_seq': 'presence_absence1' }, 'groups': {}, 'vars': {} }, } } expected_potential_cols = { 'noncoding1': { 'summary': { 'assembled', 'known_var', 'match', 'novel_var', 'pct_id', 'ref_seq' }, 'groups': set(), 'vars': set() }, 'noncoding2': { 'summary': { 'assembled', 'known_var', 'match', 'novel_var', 'pct_id', 'ref_seq' }, 'groups': set(), 'vars': set() }, 'presence_absence1': { 'summary': { 'assembled', 'known_var', 'match', 'novel_var', 'pct_id', 'ref_seq' }, 'groups': set(), 'vars': set() }, 'presence_absence2': { 'summary': { 'assembled', 'known_var', 'match', 'novel_var', 'pct_id', 'ref_seq' }, 'groups': set(), 'vars': set() } } self.maxDiff = None s = summary.Summary('out', filenames=infiles) s.samples = summary.Summary._load_input_files(s.filenames, 90) s._gather_unfiltered_output_data() self.assertEqual(expected_potential_cols, s.all_potential_columns) self.assertEqual(expected_all, s.all_data) expected_potential_cols['noncoding1']['groups'] = {'id3', 'id1', 'id1.%', 'id3.%'} expected_potential_cols['noncoding2']['groups'] = {'id2.%', 'id2'} expected_all[infiles[0]]['noncoding1']['groups'] = {'id1': 'yes', 'id1.%': 100.0} expected_all[infiles[0]]['noncoding2']['groups'] = {'id2': 'yes_multi_het', 'id2.%': 'NA'} expected_all[infiles[1]]['noncoding1']['groups'] = {'id1': 'het', 'id1.%': 80.0, 'id3': 'yes', 'id3.%': 100.0} expected_all[infiles[1]]['noncoding2']['groups'] = {'id2': 'het', 'id2.%': 40.0} s = summary.Summary('out', filenames=infiles, show_var_groups=True) s.samples = summary.Summary._load_input_files(s.filenames, 90) s._gather_unfiltered_output_data() self.assertEqual(expected_potential_cols, s.all_potential_columns) self.assertEqual(expected_all, s.all_data) expected_potential_cols['noncoding1']['vars'] = {'14T', '14T.%', '14GT', '14GT.%', '6G', '6G.%'} expected_potential_cols['noncoding2']['vars'] = {'52GT', '52GT.%', '42T', '42T.%'} expected_all[infiles[0]]['noncoding1']['vars'] = {'14T': 'yes', '14T.%': 100.0} expected_all[infiles[0]]['noncoding2']['vars'] = {'42T': 'yes', '42T.%': 100.0, '52GT': 'het', '52GT.%': 40.0} expected_all[infiles[1]]['noncoding1']['vars'] = {'14GT': 'het', '14GT.%': 80.0, '6G': 'yes', '6G.%': 100.0} expected_all[infiles[1]]['noncoding2']['vars'] = {'52GT': 'het', '52GT.%': 40.0} s = summary.Summary('out', filenames=infiles, show_var_groups=True, show_known_vars=True) s.samples = summary.Summary._load_input_files(s.filenames, 90) s._gather_unfiltered_output_data() self.assertEqual(expected_potential_cols, s.all_potential_columns) self.assertEqual(expected_all, s.all_data) expected_potential_cols['presence_absence1']['vars'] = {'A10V'} expected_potential_cols['presence_absence2']['vars'] = {'V175L'} expected_all[infiles[0]]['presence_absence1']['vars'] = {'A10V': 'yes'} expected_all[infiles[0]]['presence_absence2']['vars'] = {'V175L': 'yes'} expected_all[infiles[1]]['presence_absence1']['vars'] = {'A10V': 'yes'} s = summary.Summary('out', filenames=infiles, show_var_groups=True, show_known_vars=True, show_novel_vars=True) s.samples = summary.Summary._load_input_files(s.filenames, 90) s._gather_unfiltered_output_data() self.assertEqual(expected_potential_cols, s.all_potential_columns) self.assertEqual(expected_all, s.all_data)