def test_infer_annotation_pipeline(self): anno_fields = ['CSQ'] header = self._get_sample_header_fields(with_annotation=True) variant1 = self._get_sample_variant_1() variant1.info['CSQ'] = [ 'A|1|100|1.2', 'A|2|101|1.3', 'A|12|start|0', 'TT|13|end|7' ] variant2 = self._get_sample_variant_1() variant2.info['CSQ'] = [ 'A|1|100|', 'A|2|101|', 'A|1.2|102|0', 'TT|1.3|103|7' ] desc = 'Inferred type field for annotation {}.' expected = vcf_header_io.VcfHeader( infos={ 'CSQ_Gene_TYPE': createInfo('CSQ_Gene_TYPE', 1, 'Float', desc.format('Gene')), 'CSQ_Position_TYPE': createInfo('CSQ_Position_TYPE', 1, 'String', desc.format('Position')), 'CSQ_Score_TYPE': createInfo('CSQ_Score_TYPE', 1, 'Float', desc.format('Score')) }) with TestPipeline() as p: inferred_headers = ( p | Create([variant1, variant2]) | 'InferAnnotationTypes' >> infer_headers.InferHeaderFields( defined_headers=header, infer_headers=False, annotation_fields_to_infer=anno_fields)) assert_that(inferred_headers, asserts.header_fields_equal_ignore_order([expected])) p.run()
def test_header_fields_inferred_from_two_variants(self): with TestPipeline() as p: variant_1 = self._get_sample_variant_1() variant_2 = self._get_sample_variant_2() inferred_headers = ( p | Create([variant_1, variant_2]) | 'InferUndefinedHeaderFields' >> infer_undefined_headers.InferUndefinedHeaderFields( defined_headers=None)) expected_infos = {'IS': Info('IS', 1, 'String', '', '', ''), 'ISI': Info('ISI', 1, 'Integer', '', '', ''), 'ISF': Info('ISF', 1, 'Float', '', '', ''), 'IF': Info('IF', 1, 'Float', '', '', ''), 'IB': Info('IB', 0, 'Flag', '', '', ''), 'IA': Info('IA', None, 'Float', '', '', ''), 'IS_2': Info('IS_2', 1, 'String', '', '', '')} expected_formats = {'FI': Format('FI', 1, 'Integer', ''), 'FU': Format('FU', None, 'Float', ''), 'FI_2': Format('FI_2', 1, 'Integer', '')} expected = vcf_header_io.VcfHeader( infos=expected_infos, formats=expected_formats) assert_that(inferred_headers, asserts.header_fields_equal_ignore_order([expected])) p.run()
def test_pipeline(self): infos = {'IS': Info('IS', 1, 'String', '', '', ''), 'ISI': Info('ISI', 1, 'Integer', '', '', ''), 'ISF': Info('ISF', 1, 'Float', '', '', ''), 'IB': Info('IB', 0, 'Flag', '', '', ''), 'IA': Info('IA', -1, 'Integer', '', '', '')} formats = OrderedDict([ ('FS', Format('FS', 1, 'String', 'desc')), ('FI', Format('FI', 2, 'Integer', 'desc')), ('GT', Format('GT', 2, 'Integer', 'Special GT key')), ('PS', Format('PS', 1, 'Integer', 'Special PS key'))]) with TestPipeline() as p: variant_1 = self._get_sample_variant_info_ia_cardinality_mismatch() variant_2 = self._get_sample_variant_format_fi_float_value() inferred_headers = ( p | Create([variant_1, variant_2]) | 'InferHeaderFields' >> infer_headers.InferHeaderFields( defined_headers=vcf_header_io.VcfHeader(infos=infos, formats=formats), allow_incompatible_records=True)) expected_infos = {'IA': Info('IA', None, 'Float', '', '', ''), 'IF': Info('IF', 1, 'Float', '', '', '')} expected_formats = {'FI': Format('FI', 2, 'Float', 'desc'), 'FU': Format('FU', None, 'Float', '')} expected = vcf_header_io.VcfHeader(infos=expected_infos, formats=expected_formats) assert_that(inferred_headers, asserts.header_fields_equal_ignore_order([expected])) p.run()
def test_defined_fields_filtered_two_variants(self): # Only INFO and FORMAT in the first variants are already defined in the # header section of the VCF files. with TestPipeline() as p: vcf_headers = self._get_sample_header_fields() vcf_headers_side_input = p | 'vcf_header' >> Create([vcf_headers]) variant_1 = self._get_sample_variant_1() variant_2 = self._get_sample_variant_2() inferred_headers = ( p | Create([variant_1, variant_2]) | 'InferUndefinedHeaderFields' >> infer_undefined_headers.InferUndefinedHeaderFields( pvalue.AsSingleton(vcf_headers_side_input))) expected_infos = {'IS_2': Info('IS_2', 1, 'String', '', '', '')} expected_formats = {'FI_2': Format('FI_2', 1, 'Integer', '')} expected = vcf_header_io.VcfHeader( infos=expected_infos, formats=expected_formats) assert_that(inferred_headers, asserts.header_fields_equal_ignore_order([expected])) p.run()