def test_header_fields_inferred_from_two_variants(self):
        with TestPipeline() as p:
            variant_1 = self._get_sample_variant_1()
            variant_2 = self._get_sample_variant_2()
            inferred_headers = (
                p
                | Create([variant_1, variant_2])
                | 'InferUndefinedHeaderFields' >>
                infer_headers.InferUndefinedHeaderFields(defined_headers=None))

            expected_infos = {
                'IS': Info('IS', 1, 'String', '', '', ''),
                'ISI': Info('ISI', 1, 'Integer', '', '', ''),
                'ISF': Info('ISF', 1, 'Float', '', '', ''),
                'IF': Info('IF', 1, 'Float', '', '', ''),
                'IB': Info('IB', 0, 'Flag', '', '', ''),
                'IA': Info('IA', None, 'Float', '', '', ''),
                'IS_2': Info('IS_2', 1, 'String', '', '', '')
            }
            expected_formats = {
                'FI': Format('FI', 1, 'Integer', ''),
                'FU': Format('FU', None, 'Float', ''),
                'FI_2': Format('FI_2', 1, 'Integer', '')
            }

            expected = vcf_header_io.VcfHeader(infos=expected_infos,
                                               formats=expected_formats)
            assert_that(inferred_headers,
                        asserts.header_fields_equal_ignore_order([expected]))
            p.run()
def _get_inferred_headers(
        variants,  # type: pvalue.PCollection
        merged_header  # type: pvalue.PCollection
):
    # type: (...) -> (pvalue.PCollection, pvalue.PCollection)
    inferred_headers = (variants
                        | 'FilterVariants' >> filter_variants.FilterVariants()
                        | ' InferUndefinedHeaderFields' >>
                        infer_headers.InferUndefinedHeaderFields(
                            pvalue.AsSingleton(merged_header)))
    merged_header = (
        (inferred_headers, merged_header)
        | beam.Flatten()
        | 'MergeHeadersFromVcfAndVariants' >>
        merge_headers.MergeHeaders(allow_incompatible_records=True))
    return inferred_headers, merged_header
 def test_defined_fields_filtered_one_variant(self):
     # All FORMATs and INFOs are already defined in the header section of VCF
     # files.
     with TestPipeline() as p:
         vcf_headers = self._get_sample_header_fields()
         vcf_headers_side_input = p | 'vcf_headers' >> Create([vcf_headers])
         variant = self._get_sample_variant_1()
         inferred_headers = (
             p
             | Create([variant])
             | 'InferUndefinedHeaderFields' >>
             infer_headers.InferUndefinedHeaderFields(
                 pvalue.AsSingleton(vcf_headers_side_input)))
         expected = vcf_header_io.VcfHeader()
         assert_that(inferred_headers, equal_to([expected]))
         p.run()
def _add_inferred_headers(
        pipeline,  # type: beam.Pipeline
        known_args,  # type: argparse.Namespace
        merged_header  # type: pvalue.PCollection
):
    # type: (...) -> pvalue.PCollection
    inferred_headers = (_read_variants(pipeline, known_args)
                        | 'FilterVariants' >> filter_variants.FilterVariants(
                            reference_names=known_args.reference_names)
                        | ' InferUndefinedHeaderFields' >>
                        infer_headers.InferUndefinedHeaderFields(
                            pvalue.AsSingleton(merged_header)))
    merged_header = (
        (inferred_headers, merged_header)
        | beam.Flatten()
        | 'MergeHeadersFromVcfAndVariants' >> merge_headers.MergeHeaders(
            known_args.split_alternate_allele_info_fields,
            known_args.allow_incompatible_records))
    return merged_header
    def test_defined_fields_filtered_two_variants(self):
        # Only INFO and FORMAT in the first variants are already defined in the
        # header section of the VCF files.
        with TestPipeline() as p:
            vcf_headers = self._get_sample_header_fields()
            vcf_headers_side_input = p | 'vcf_header' >> Create([vcf_headers])
            variant_1 = self._get_sample_variant_1()
            variant_2 = self._get_sample_variant_2()
            inferred_headers = (
                p
                | Create([variant_1, variant_2])
                | 'InferUndefinedHeaderFields' >>
                infer_headers.InferUndefinedHeaderFields(
                    pvalue.AsSingleton(vcf_headers_side_input)))

            expected_infos = {'IS_2': Info('IS_2', 1, 'String', '', '', '')}
            expected_formats = {'FI_2': Format('FI_2', 1, 'Integer', '')}
            expected = vcf_header_io.VcfHeader(infos=expected_infos,
                                               formats=expected_formats)
            assert_that(inferred_headers,
                        asserts.header_fields_equal_ignore_order([expected]))
            p.run()