def test_merge_header_definitions_no_conflicting_headers(self): lines_1 = [ '##FORMAT=<ID=NS,Number=1,Type=Float,Description="Number samples">\n', '#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 Sample2\n' ] lines_2 = [ '##FORMAT=<ID=DP,Number=2,Type=Float,Description="Total Depth">\n', '#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample3\n' ] vcf_reader_1 = vcf.Reader(fsock=iter(lines_1)) vcf_reader_2 = vcf.Reader(fsock=iter(lines_2)) headers_1 = self._get_vcf_header_from_reader(vcf_reader_1, 'file1') headers_2 = self._get_vcf_header_from_reader(vcf_reader_2, 'file2') pipeline = TestPipeline() merged_definitions = ( pipeline | Create([headers_1, headers_2]) | 'MergeDefinitions' >> merge_header_definitions.MergeDefinitions()) expected = VcfHeaderDefinitions() expected._formats = { 'NS': { Definition(1, 'Float'): ['file1'] }, 'DP': { Definition(2, 'Float'): ['file2'] } } assert_that(merged_definitions, equal_to([expected])) pipeline.run()
def test_merge_different_id(self): merger = DefinitionsMerger() lines_1 = [ '##INFO=<ID=NS,Number=1,Type=Float,Description="Number samples">\n', '##FORMAT=<ID=DP,Number=3,Type=Character,Description="Num samples">\n', '#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 Sample2\n' ] lines_2 = [ '##INFO=<ID=NK,Number=1,Type=Float,Description="Number samples2">\n', '##FORMAT=<ID=DL,Number=3,Type=Character,Description="Num samples2">\n', '#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample3\n' ] main_definitions = self._create_definitions_from_lines(lines_1, 'file1') secondary_definitions = self._create_definitions_from_lines( lines_2, 'file2') expected_infos = { 'NS': {Definition(1, 'Float'): ['file1']}, 'NK': {Definition(1, 'Float'): ['file2']} } expected_formats = { 'DP': {Definition(3, 'Character'): ['file1']}, 'DL': {Definition(3, 'Character'): ['file2']} } merger.merge(main_definitions, secondary_definitions) self.assertDictEqual(expected_infos, main_definitions.infos) self.assertDictEqual(expected_formats, main_definitions.formats)
def test_report_multiple_files(self): header_definitions = VcfHeaderDefinitions() header_definitions._infos = { 'NS': { Definition(1, 'Float'): ['file1', 'file2'], Definition(1, 'Integer'): ['file3'] } } infos = OrderedDict([('NS', createInfo('NS', 1, 'Float', 'Number samples', None, None))]) resolved_headers = VcfHeader(infos=infos) expected = [ preprocess_reporter._InconsistencyType.HEADER_CONFLICTS + '\n', preprocess_reporter._HeaderLine.CONFLICTS_HEADER + '\n', (preprocess_reporter._DELIMITER).join([ 'NS', 'INFO', 'num=1 type=Float', 'file1', 'num=1 type=Float\n' ]), (preprocess_reporter._DELIMITER).join( [' ', ' ', ' ', 'file2', ' \n']), (preprocess_reporter._DELIMITER).join( [' ', ' ', 'num=1 type=Integer', 'file3', ' \n']), '\n' ] self._generate_report_and_assert_contents_equal( expected, header_definitions, resolved_headers)
def test_report_conflicted_and_inferred_headers(self): header_definitions = VcfHeaderDefinitions() header_definitions._infos = { 'NS': { Definition(1, 'Float'): ['file1'], Definition(1, 'Integer'): ['file2'] } } infos = OrderedDict([('NS', createInfo('NS', 1, 'Float', 'Number samples', None, None))]) formats = OrderedDict([('DP', createFormat('DP', 2, 'Float', 'Total Depth'))]) resolved_headers = VcfHeader(infos=infos, formats=formats) inferred_headers = VcfHeader(formats=formats) expected = [ preprocess_reporter._InconsistencyType.HEADER_CONFLICTS + '\n', preprocess_reporter._HeaderLine.CONFLICTS_HEADER + '\n', (preprocess_reporter._DELIMITER).join([ 'NS', 'INFO', 'num=1 type=Float', 'file1', 'num=1 type=Float\n' ]), (preprocess_reporter._DELIMITER).join( [' ', ' ', 'num=1 type=Integer', 'file2', ' \n']), '\n', preprocess_reporter._InconsistencyType.INFERRED_HEADERS + '\n', preprocess_reporter._HeaderLine.INFERRED_FIELD_HEADER + '\n', (preprocess_reporter._DELIMITER).join( ['DP', 'FORMAT', 'num=2 type=Float\n']), '\n' ] self._generate_report_and_assert_contents_equal( expected, header_definitions, resolved_headers, inferred_headers)
def test_merge_header_definitions_save_five_copies(self): lines_1 = [ '##INFO=<ID=NS,Number=1,Type=Float,Description="Number samples">\n', '#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 Sample2\n' ] lines_2 = [ '##INFO=<ID=NS,Number=1,Type=Integer,Description="Number samples">\n', '#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample3\n' ] vcf_reader_1 = vcf.Reader(fsock=iter(lines_1)) vcf_reader_2 = vcf.Reader(fsock=iter(lines_2)) file_names = ['file1', 'file2', 'file3', 'file4', 'file5', 'file6'] headers = [] for file_name in file_names: headers.append(self._get_vcf_header_from_reader(vcf_reader_1, file_name)) headers.append(self._get_vcf_header_from_reader(vcf_reader_2, 'file7')) pipeline = TestPipeline() merged_definitions = ( pipeline | Create(headers) | 'MergeDefinitions' >> merge_header_definitions.MergeDefinitions()) expected = VcfHeaderDefinitions() expected._infos = { 'NS': {Definition(1, 'Float'): ['file1', 'file2', 'file3', 'file4', 'file5'], Definition(1, 'Integer'): ['file7']}} assert_that(merged_definitions, equal_to([expected])) pipeline.run()
def test_create_definitions_multi(self): lines = [ '##INFO=<ID=NS,Number=1,Type=Integer,Description="Number samples">\n', '##INFO=<ID=DP,Number=2,Type=Float,Description="Number samples">\n', '##FORMAT=<ID=NS,Number=3,Type=Integer,Description="Number samples">\n', '##FORMAT=<ID=DP,Number=4,Type=Float,Description="Number samples">\n', '#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 Sample2\n' ] vcf_reader = vcf.Reader(fsock=iter(lines)) header = self._get_vcf_header_from_reader(vcf_reader, 'file1') expected_info = { 'NS': { Definition(1, 'Integer'): ['file1'] }, 'DP': { Definition(2, 'Float'): ['file1'] } } expected_format = { 'NS': { Definition(3, 'Integer'): ['file1'] }, 'DP': { Definition(4, 'Float'): ['file1'] } } header_definitions = VcfHeaderDefinitions(header) self.assertDictEqual(header_definitions.infos, expected_info) self.assertDictEqual(header_definitions.formats, expected_format)
def test_report_no_conflicts(self): header_definitions = VcfHeaderDefinitions() header_definitions._infos = {'NS': {Definition(1, 'Float'): ['file1']}} header_definitions._formats = { 'NS': { Definition(1, 'Float'): ['file2'] } } infos = OrderedDict([('NS', Info('NS', 1, 'Integer', 'Number samples', None, None))]) formats = OrderedDict([('NS', Format('NS', 1, 'Float', 'Number samples'))]) resolved_headers = VcfHeader(infos=infos, formats=formats) expected = ['No Header Conflicts Found.\n', '\n'] self._generate_report_and_assert_contents_equal( expected, header_definitions, resolved_headers)
def test_report_no_resolved_headers(self): header_definitions = VcfHeaderDefinitions() header_definitions._infos = { 'NS': { Definition(1, 'Float'): ['file1'], Definition(1, 'Integer'): ['file2'] } } expected = [ preprocess_reporter._InconsistencyType.HEADER_CONFLICTS + '\n', preprocess_reporter._HeaderLine.CONFLICTS_HEADER + '\n', (preprocess_reporter._DELIMITER).join( ['NS', 'INFO', 'num=1 type=Float', 'file1', 'Not resolved.\n']), (preprocess_reporter._DELIMITER).join( [' ', ' ', 'num=1 type=Integer', 'file2', ' \n']), '\n' ] self._generate_report_and_assert_contents_equal( expected, header_definitions)
def test_create_definitions_with_info(self): lines = [ '##INFO=<ID=NS,Number=1,Type=Float,Description="Number samples">\n', '#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 Sample2\n' ] header = self._get_header_from_lines(lines, 'file1') expected_info = {'NS': {Definition(1, 'Float'): ['file1']}} expected_format = {} header_definitions = VcfHeaderDefinitions(header) self.assertDictEqual(header_definitions.infos, expected_info) self.assertDictEqual(header_definitions.formats, expected_format)
def test_merge_header_definitions_one_header(self): lines = [ '##INFO=<ID=NS,Number=1,Type=Integer,Description="Number samples">\n', '#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample1 Sample2\n' ] vcf_reader = vcf.Reader(fsock=iter(lines)) headers = self._get_vcf_header_from_reader(vcf_reader, 'file1') pipeline = TestPipeline() merged_definitions = ( pipeline | Create([headers]) | 'MergeDefinitions' >> merge_header_definitions.MergeDefinitions()) expected = VcfHeaderDefinitions() expected._infos = {'NS': {Definition(1, 'Integer'): ['file1']}} assert_that(merged_definitions, equal_to([expected])) pipeline.run()