def test_report_conflicted_and_inferred_headers(self): header_definitions = merge_header_definitions.VcfHeaderDefinitions() header_definitions._infos = {'NS': {Definition(1, 'Float'): ['file1'], Definition(1, 'Integer'): ['file2']}} infos = OrderedDict([ ('NS', Info('NS', 1, 'Float', 'Number samples', None, None))]) formats = OrderedDict([ ('DP', Format('DP', 2, 'Float', 'Total Depth'))]) resolved_headers = VcfHeader(infos=infos, formats=formats) inferred_headers = VcfHeader(formats=formats) expected = [ preprocess_reporter._InconsistencyType.HEADER_CONFLICTS + '\n', preprocess_reporter._HeaderLine.CONFLICTS_HEADER + '\n', (preprocess_reporter._DELIMITER).join([ 'NS', 'INFO', 'num=1 type=Float', 'file1', 'num=1 type=Float\n']), (preprocess_reporter._DELIMITER).join([ ' ', ' ', 'num=1 type=Integer', 'file2', ' \n']), '\n', preprocess_reporter._InconsistencyType.UNDEFINED_HEADERS + '\n', preprocess_reporter._HeaderLine.UNDEFINED_FIELD_HEADER + '\n', (preprocess_reporter._DELIMITER).join([ 'DP', 'FORMAT', 'num=2 type=Float\n']), '\n' ] self._generate_report_and_assert_contents_equal(expected, header_definitions, resolved_headers, inferred_headers)
def test_report_multiple_files(self): header_definitions = merge_header_definitions.VcfHeaderDefinitions() header_definitions._infos = { 'NS': {Definition(1, 'Float'): ['file1', 'file2'], Definition(1, 'Integer'): ['file3']} } infos = OrderedDict([ ('NS', Info('NS', 1, 'Float', 'Number samples', None, None))]) resolved_headers = VcfHeader(infos=infos) expected = [ preprocess_reporter._InconsistencyType.HEADER_CONFLICTS + '\n', preprocess_reporter._HeaderLine.CONFLICTS_HEADER + '\n', (preprocess_reporter._DELIMITER).join([ 'NS', 'INFO', 'num=1 type=Float', 'file1', 'num=1 type=Float\n']), (preprocess_reporter._DELIMITER).join([ ' ', ' ', ' ', 'file2', ' \n']), (preprocess_reporter._DELIMITER).join([ ' ', ' ', 'num=1 type=Integer', 'file3', ' \n']), '\n' ] self._generate_report_and_assert_contents_equal(expected, header_definitions, resolved_headers)
def _get_header_from_reader(vcf_reader, file_name=None): return VcfHeader(infos=vcf_reader.infos, filters=vcf_reader.filters, alts=vcf_reader.alts, formats=vcf_reader.formats, contigs=vcf_reader.contigs, file_name=file_name)
def _get_header_from_reader(vcf_reader, file_path=None): return VcfHeader(infos=vcf_reader.infos, filters=vcf_reader.filters, alts=vcf_reader.alts, formats=vcf_reader.formats, contigs=vcf_reader.contigs, samples=vcf_reader.samples, file_path=file_path)
def test_report_no_inconsistencies(self): header_definitions = merge_header_definitions.VcfHeaderDefinitions() inferred_headers = VcfHeader() expected = [ 'No Header Conflicts Found.\n', '\n', 'No Undefined Headers Found.\n', '\n', 'No Malformed Records Found.\n', '\n' ] self._generate_report_and_assert_contents_equal( expected, header_definitions, inferred_headers=inferred_headers, malformed_records=[])
def test_report_no_conflicts(self): header_definitions = merge_header_definitions.VcfHeaderDefinitions() header_definitions._infos = {'NS': {Definition(1, 'Float'): ['file1']}} header_definitions._formats = {'NS': {Definition(1, 'Float'): ['file2']}} infos = OrderedDict([ ('NS', Info('NS', 1, 'Integer', 'Number samples', None, None))]) formats = OrderedDict([('NS', Format('NS', 1, 'Float', 'Number samples'))]) resolved_headers = VcfHeader(infos=infos, formats=formats) expected = ['No Header Conflicts Found.\n', '\n'] self._generate_report_and_assert_contents_equal(expected, header_definitions, resolved_headers)
def test_report_inferred_headers_only(self): header_definitions = merge_header_definitions.VcfHeaderDefinitions() formats = OrderedDict([('DP', Format('DP', 2, 'Float', 'Total Depth'))]) inferred_headers = VcfHeader(formats=formats) expected = [ 'No Header Conflicts Found.\n', '\n', preprocess_reporter._InconsistencyType.UNDEFINED_HEADERS + '\n', preprocess_reporter._HeaderLine.UNDEFINED_FIELD_HEADER + '\n', (preprocess_reporter._DELIMITER).join( ['DP', 'FORMAT', 'num=2 type=Float\n']), '\n' ] self._generate_report_and_assert_contents_equal( expected, header_definitions, inferred_headers=inferred_headers)
def _get_vcf_header_from_lines(lines, file_name=None): header = libcbcf.VariantHeader() sample_line = LAST_HEADER_LINE_PREFIX header.add_line('##fileformat=VCFv4.0') for line in lines: if line.startswith('#'): if line.startswith(LAST_HEADER_LINE_PREFIX): sample_line = line.strip() break header.add_line(line.strip()) else: break return VcfHeader(infos=header.info, filters=header.filters, alts=header.alts, formats=header.formats, contigs=header.contigs, samples=sample_line, file_path=file_name)
def test_report_malformed_records(self): header_definitions = merge_header_definitions.VcfHeaderDefinitions() inferred_headers = VcfHeader() records = [vcfio.MalformedVcfRecord('file1', 'rs6054257 G A 29 PASS', 'Invalid literal')] expected = [ 'No Header Conflicts Found.\n', '\n', 'No Undefined Headers Found.\n', '\n', preprocess_reporter._InconsistencyType.MALFORMED_RECORDS + '\n', preprocess_reporter._HeaderLine.MALFORMED_RECORDS_HEADER + '\n', (preprocess_reporter._DELIMITER).join([ 'file1', 'rs6054257 G A 29 PASS', 'Invalid literal\n']), '\n' ] self._generate_report_and_assert_contents_equal( expected, header_definitions, inferred_headers=inferred_headers, malformed_records=records)