def test_merge_header_definitions_save_five_copies(self):
    lines_1 = [
        '##INFO=<ID=NS,Number=1,Type=Float,Description="Number samples">\n',
        '#CHROM  POS ID  REF ALT QUAL  FILTER  INFO  FORMAT  Sample1 Sample2\n'
    ]
    lines_2 = [
        '##INFO=<ID=NS,Number=1,Type=Integer,Description="Number samples">\n',
        '#CHROM  POS ID  REF ALT QUAL  FILTER  INFO  FORMAT  Sample3\n'
    ]

    vcf_reader_1 = vcf.Reader(fsock=iter(lines_1))
    vcf_reader_2 = vcf.Reader(fsock=iter(lines_2))
    file_names = ['file1', 'file2', 'file3', 'file4', 'file5', 'file6']
    headers = []
    for file_name in file_names:
      headers.append(self._get_vcf_header_from_reader(vcf_reader_1, file_name))
    headers.append(self._get_vcf_header_from_reader(vcf_reader_2, 'file7'))

    pipeline = TestPipeline()
    merged_definitions = (
        pipeline
        | Create(headers)
        | 'MergeDefinitions' >> merge_header_definitions.MergeDefinitions())

    expected = VcfHeaderDefinitions()
    expected._infos = {
        'NS': {Definition(1, 'Float'):
                   ['file1', 'file2', 'file3', 'file4', 'file5'],
               Definition(1, 'Integer'): ['file7']}}
    assert_that(merged_definitions, equal_to([expected]))
    pipeline.run()
  def test_merge_header_definitions_no_conflicting_headers(self):
    lines_1 = [
        '##FORMAT=<ID=NS,Number=1,Type=Float,Description="Number samples">\n',
        '#CHROM  POS ID  REF ALT QUAL  FILTER  INFO  FORMAT  Sample1 Sample2\n'
    ]
    lines_2 = [
        '##FORMAT=<ID=DP,Number=2,Type=Float,Description="Total Depth">\n',
        '#CHROM  POS ID  REF ALT QUAL  FILTER  INFO  FORMAT  Sample3\n'
    ]

    vcf_reader_1 = vcf.Reader(fsock=iter(lines_1))
    vcf_reader_2 = vcf.Reader(fsock=iter(lines_2))
    headers_1 = self._get_vcf_header_from_reader(vcf_reader_1, 'file1')
    headers_2 = self._get_vcf_header_from_reader(vcf_reader_2, 'file2')
    pipeline = TestPipeline()
    merged_definitions = (
        pipeline
        | Create([headers_1, headers_2])
        | 'MergeDefinitions' >> merge_header_definitions.MergeDefinitions())

    expected = VcfHeaderDefinitions()
    expected._formats = {'NS': {Definition(1, 'Float'): ['file1']},
                         'DP': {Definition(2, 'Float'): ['file2']}}
    assert_that(merged_definitions, equal_to([expected]))
    pipeline.run()
  def test_report_multiple_files(self):
    header_definitions = merge_header_definitions.VcfHeaderDefinitions()
    header_definitions._infos = {
        'NS': {Definition(1, 'Float'): ['file1', 'file2'],
               Definition(1, 'Integer'): ['file3']}
    }

    infos = OrderedDict([
        ('NS', Info('NS', 1, 'Float', 'Number samples', None, None))])
    resolved_headers = VcfHeader(infos=infos)

    expected = [
        preprocess_reporter._InconsistencyType.HEADER_CONFLICTS + '\n',
        preprocess_reporter._HeaderLine.CONFLICTS_HEADER + '\n',
        (preprocess_reporter._DELIMITER).join([
            'NS', 'INFO', 'num=1 type=Float', 'file1', 'num=1 type=Float\n']),
        (preprocess_reporter._DELIMITER).join([
            ' ', ' ', ' ', 'file2', ' \n']),
        (preprocess_reporter._DELIMITER).join([
            ' ', ' ', 'num=1 type=Integer', 'file3', ' \n']),
        '\n'
    ]
    self._generate_report_and_assert_contents_equal(expected,
                                                    header_definitions,
                                                    resolved_headers)
  def test_report_conflicted_and_inferred_headers(self):
    header_definitions = merge_header_definitions.VcfHeaderDefinitions()
    header_definitions._infos = {'NS': {Definition(1, 'Float'): ['file1'],
                                        Definition(1, 'Integer'): ['file2']}}

    infos = OrderedDict([
        ('NS', Info('NS', 1, 'Float', 'Number samples', None, None))])
    formats = OrderedDict([
        ('DP', Format('DP', 2, 'Float', 'Total Depth'))])
    resolved_headers = VcfHeader(infos=infos, formats=formats)
    inferred_headers = VcfHeader(formats=formats)
    expected = [
        preprocess_reporter._InconsistencyType.HEADER_CONFLICTS + '\n',
        preprocess_reporter._HeaderLine.CONFLICTS_HEADER + '\n',
        (preprocess_reporter._DELIMITER).join([
            'NS', 'INFO', 'num=1 type=Float', 'file1', 'num=1 type=Float\n']),
        (preprocess_reporter._DELIMITER).join([
            ' ', ' ', 'num=1 type=Integer', 'file2', ' \n']),
        '\n',
        preprocess_reporter._InconsistencyType.UNDEFINED_HEADERS + '\n',
        preprocess_reporter._HeaderLine.UNDEFINED_FIELD_HEADER + '\n',
        (preprocess_reporter._DELIMITER).join([
            'DP', 'FORMAT', 'num=2 type=Float\n']),
        '\n'
    ]
    self._generate_report_and_assert_contents_equal(expected,
                                                    header_definitions,
                                                    resolved_headers,
                                                    inferred_headers)
  def test_report_no_conflicts(self):
    header_definitions = merge_header_definitions.VcfHeaderDefinitions()
    header_definitions._infos = {'NS': {Definition(1, 'Float'): ['file1']}}
    header_definitions._formats = {'NS': {Definition(1, 'Float'): ['file2']}}

    infos = OrderedDict([
        ('NS', Info('NS', 1, 'Integer', 'Number samples', None, None))])
    formats = OrderedDict([('NS', Format('NS', 1, 'Float', 'Number samples'))])
    resolved_headers = VcfHeader(infos=infos, formats=formats)

    expected = ['No Header Conflicts Found.\n', '\n']
    self._generate_report_and_assert_contents_equal(expected,
                                                    header_definitions,
                                                    resolved_headers)
  def test_report_no_resolved_headers(self):
    header_definitions = merge_header_definitions.VcfHeaderDefinitions()
    header_definitions._infos = {'NS': {Definition(1, 'Float'): ['file1'],
                                        Definition(1, 'Integer'): ['file2']}}

    expected = [
        preprocess_reporter._InconsistencyType.HEADER_CONFLICTS + '\n',
        preprocess_reporter._HeaderLine.CONFLICTS_HEADER + '\n',
        (preprocess_reporter._DELIMITER).join([
            'NS', 'INFO', 'num=1 type=Float', 'file1', 'Not resolved.\n']),
        (preprocess_reporter._DELIMITER).join([
            ' ', ' ', 'num=1 type=Integer', 'file2', ' \n']),
        '\n'
    ]

    self._generate_report_and_assert_contents_equal(expected,
                                                    header_definitions)
  def test_merge_header_definitions_one_header(self):
    lines = [
        '##INFO=<ID=NS,Number=1,Type=Integer,Description="Number samples">\n',
        '#CHROM  POS ID  REF ALT QUAL  FILTER  INFO  FORMAT  Sample1 Sample2\n'
    ]

    vcf_reader = vcf.Reader(fsock=iter(lines))
    headers = self._get_vcf_header_from_reader(vcf_reader, 'file1')
    pipeline = TestPipeline()
    merged_definitions = (
        pipeline
        | Create([headers])
        | 'MergeDefinitions' >> merge_header_definitions.MergeDefinitions())

    expected = VcfHeaderDefinitions()
    expected._infos = {'NS': {Definition(1, 'Integer'): ['file1']}}
    assert_that(merged_definitions, equal_to([expected]))
    pipeline.run()