예제 #1
0
 def test_config_failed_missing_partition_name(self):
   tempdir = temp_dir.TempDir()
   missing_par_name = [
       '-  partition:',
       '     regions:',
       '       - "chr1:0-1,000,000"',
   ]
   with self.assertRaisesRegexp(
       ValueError,
       'Each partition must have partition_name field.'):
     _ = variant_partition.VariantPartition(
         tempdir.create_temp_file(suffix='.yaml',
                                  lines='\n'.join(missing_par_name)))
   empty_par_name = [
       '-  partition:',
       '     partition_name: "          "',
       '     regions:',
       '       - "chr1:0-1,000,000"',
   ]
   with self.assertRaisesRegexp(
       ValueError,
       'Partition name can not be empty string.'):
     _ = variant_partition.VariantPartition(
         tempdir.create_temp_file(suffix='.yaml',
                                  lines='\n'.join(empty_par_name)))
예제 #2
0
  def test_auto_partitioning(self):
    partitioner = variant_partition.VariantPartition()
    self.assertTrue(partitioner.should_flatten())
    self.assertEqual(partitioner.get_num_partitions(),
                     variant_partition._DEFAULT_NUM_PARTITIONS)

    # Checking standard reference_name formatted as: 'chr[0-9][0-9]'
    for i in xrange(variant_partition._RESERVED_AUTO_PARTITIONS):
      self.assertEqual(partitioner.get_partition('chr' + str(i + 1)), i)
    # Checking standard reference_name formatted as: '[0-9][0-9]'
    for i in xrange(variant_partition._RESERVED_AUTO_PARTITIONS):
      self.assertEqual(partitioner.get_partition(str(i + 1)), i)

    # Every other reference_name will be assigned to partitions >= 22
    self.assertGreaterEqual(partitioner.get_partition('chrY'),
                            variant_partition._RESERVED_AUTO_PARTITIONS)
    self.assertGreaterEqual(partitioner.get_partition('chrX'),
                            variant_partition._RESERVED_AUTO_PARTITIONS)
    self.assertGreaterEqual(partitioner.get_partition('chrM'),
                            variant_partition._RESERVED_AUTO_PARTITIONS)
    self.assertGreaterEqual(partitioner.get_partition('chr23'),
                            variant_partition._RESERVED_AUTO_PARTITIONS)
    self.assertGreaterEqual(partitioner.get_partition('chr30'),
                            variant_partition._RESERVED_AUTO_PARTITIONS)
    self.assertGreaterEqual(partitioner.get_partition('Unknown'),
                            variant_partition._RESERVED_AUTO_PARTITIONS)
    # Expected empty string as partition_name as we are in auto mode.
    self.assertEqual(partitioner.get_partition_name(0), None)
    self.assertEqual(partitioner.get_partition_name(100), None)
예제 #3
0
  def test_config_non_existent_partition_name(self):
    partitioner = variant_partition.VariantPartition(
        'gcp_variant_transforms/testing/data/partition_configs/'
        'residual_at_end.yaml')
    self.assertFalse(partitioner.should_flatten())
    self.assertEqual(partitioner.get_num_partitions(), 8)

    with self.assertRaisesRegexp(
        ValueError, 'Given partition index -1 is outside of expected range*'):
      partitioner.get_partition_name(-1)
    with self.assertRaisesRegexp(
        ValueError, 'Given partition index 8 is outside of expected range*'):
      partitioner.get_partition_name(8)
예제 #4
0
  def test_config_boundaries(self):
    partitioner = variant_partition.VariantPartition(
        'gcp_variant_transforms/testing/data/partition_configs/'
        'residual_at_end.yaml')
    self.assertFalse(partitioner.should_flatten())
    self.assertEqual(partitioner.get_num_partitions(), 8)
    for i in range(partitioner.get_num_partitions()):
      self.assertTrue(partitioner.should_keep_partition(i))

    # 'chr1:0-1,000,000'
    self.assertEqual(partitioner.get_partition('chr1', 0), 0)
    self.assertEqual(partitioner.get_partition('chr1', 999999), 0)
    # 'chr1:1,000,000-2,000,000'
    self.assertEqual(partitioner.get_partition('chr1', 1000000), 1)
    self.assertEqual(partitioner.get_partition('chr1', 1999999), 1)
    # 'chr1:2,000,000-999,999,999'
    self.assertEqual(partitioner.get_partition('chr1', 2000000), 2)
    self.assertEqual(partitioner.get_partition('chr1', 999999998), 2)
    self.assertEqual(partitioner.get_partition('chr1', 999999999), 7)

    # 'chr2' OR 'chr2_alternate_name1' OR 'chr2_alternate_name2' OR '2'.
    self.assertEqual(partitioner.get_partition('chr2', 0), 3)
    self.assertEqual(partitioner.get_partition('chr2', 999999999000), 3)
    self.assertEqual(
        partitioner.get_partition('chr2_alternate_name1', 0), 3)
    self.assertEqual(
        partitioner.get_partition('chr2_alternate_name1', 999999999000), 3)
    self.assertEqual(partitioner.get_partition('chr2_alternate_name2', 0), 3)
    self.assertEqual(
        partitioner.get_partition('CHR2_ALTERNATE_NAME2', 999999999000), 3)
    self.assertEqual(partitioner.get_partition('2', 0), 3)
    self.assertEqual(partitioner.get_partition('2', 999999999000), 3)

    # 'chr4' OR 'chr5' OR 'chr6:1,000,000-2,000,000'
    self.assertEqual(partitioner.get_partition('chr4', 0), 4)
    self.assertEqual(partitioner.get_partition('chr4', 999999999000), 4)
    self.assertEqual(partitioner.get_partition('chr5', 0), 4)
    self.assertEqual(partitioner.get_partition('chr5', 999999999000), 4)
    self.assertEqual(partitioner.get_partition('chr6', 1000000), 4)
    self.assertEqual(partitioner.get_partition('chr6', 2000000 - 1), 4)
    self.assertEqual(partitioner.get_partition('chr6', 0), 7)
    self.assertEqual(partitioner.get_partition('chr6', 999999), 7)
    self.assertEqual(partitioner.get_partition('chr6', 2000000), 7)

    # '3:0-500,000'
    self.assertEqual(partitioner.get_partition('3', 0), 5)
    self.assertEqual(partitioner.get_partition('3', 499999), 5)
    # '3:500,000-1,000,000'
    self.assertEqual(partitioner.get_partition('3', 500000), 6)
    self.assertEqual(partitioner.get_partition('3', 999999), 6)
    self.assertEqual(partitioner.get_partition('3', 1000000), 7)
예제 #5
0
  def test_auto_partitioning_invalid_partitions(self):
    partitioner = variant_partition.VariantPartition()
    self.assertTrue(partitioner.should_flatten())
    self.assertEqual(partitioner.get_num_partitions(),
                     variant_partition._DEFAULT_NUM_PARTITIONS)

    with self.assertRaisesRegexp(ValueError, 'Cannot partition given input*'):
      partitioner.get_partition('chr1', -1)

    with self.assertRaisesRegexp(ValueError, 'Cannot partition given input*'):
      partitioner.get_partition('', 1)

    with self.assertRaisesRegexp(ValueError, 'Cannot partition given input*'):
      partitioner.get_partition('  ', 1)
예제 #6
0
  def test_config_case_insensitive(self):
    partitioner = variant_partition.VariantPartition(
        'gcp_variant_transforms/testing/data/partition_configs/'
        'residual_at_end.yaml')
    self.assertFalse(partitioner.should_flatten())
    self.assertEqual(partitioner.get_num_partitions(), 8)
    for i in range(partitioner.get_num_partitions()):
      self.assertTrue(partitioner.should_keep_partition(i))

    # 'chr1:0-1,000,000'
    self.assertEqual(partitioner.get_partition('chr1', 0), 0)
    self.assertEqual(partitioner.get_partition('Chr1', 0), 0)
    self.assertEqual(partitioner.get_partition('CHr1', 0), 0)
    self.assertEqual(partitioner.get_partition('CHR1', 0), 0)
예제 #7
0
  def test_config_get_partition_name(self):
    partitioner = variant_partition.VariantPartition(
        'gcp_variant_transforms/testing/data/partition_configs/'
        'residual_at_end.yaml')
    self.assertFalse(partitioner.should_flatten())
    self.assertEqual(partitioner.get_num_partitions(), 8)
    for i in range(partitioner.get_num_partitions()):
      self.assertTrue(partitioner.should_keep_partition(i))

    self.assertEqual(partitioner.get_partition_name(0), 'chr01_part1')
    self.assertEqual(partitioner.get_partition_name(1), 'chr01_part2')
    self.assertEqual(partitioner.get_partition_name(2), 'chr01_part3')
    self.assertEqual(partitioner.get_partition_name(3), 'chrom02')
    self.assertEqual(partitioner.get_partition_name(4), 'chrom04_05_part_06')
    self.assertEqual(partitioner.get_partition_name(5), 'chr3_01')
    self.assertEqual(partitioner.get_partition_name(6), 'chr3_02')
    self.assertEqual(partitioner.get_partition_name(7), 'all_remaining')
예제 #8
0
  def test_partition_variants(self):
    expected_partitions = self._get_standard_variant_partitions()
    expected_partitions.update(self._get_nonstandard_variant_partitions())
    variants = [variant
                for variant_list in expected_partitions.values()
                for variant in variant_list]

    partitioner = variant_partition.VariantPartition()
    pipeline = TestPipeline()
    partitions = (
        pipeline
        | Create(variants)
        | 'PartitionVariants' >> beam.Partition(
            partition_variants.PartitionVariants(partitioner),
            partitioner.get_num_partitions()))
    for i in xrange(partitioner.get_num_partitions()):
      assert_that(partitions[i], equal_to(expected_partitions.get(i, [])),
                  label=str(i))
    pipeline.run()
예제 #9
0
 def test_config_failed_missing_region(self):
   tempdir = temp_dir.TempDir()
   missing_region = [
       '-  partition:',
       '     partition_name: "chr01_part1"',
       '     regions:',
       '       - "chr1:0-1,000,000"',
       '-  partition:',
       '     partition_name: "all_remaining"',
       '     regions:',
       '       - "residual"',
       '-  partition:',
       '     partition_name: "missing_region"',
       '     regions:',
   ]
   with self.assertRaisesRegexp(
       ValueError,
       'Each partition must have at least one region.'):
     _ = variant_partition.VariantPartition(
         tempdir.create_temp_file(suffix='.yaml',
                                  lines='\n'.join(missing_region)))
예제 #10
0
 def test_config_failed_duplicate_table_name(self):
   tempdir = temp_dir.TempDir()
   dup_table_name = [
       '-  partition:',
       '     partition_name: "duplicate_name"',
       '     regions:',
       '       - "chr1:0-1,000,000"',
       '-  partition:',
       '     partition_name: "all_remaining"',
       '     regions:',
       '       - "residual"',
       '-  partition:',
       '     partition_name: "duplicate_name"',
       '     regions:',
       '       - "chr1:1,000,000-2,000,000"',
   ]
   with self.assertRaisesRegexp(
       ValueError,
       'Partition names must be unique *'):
     _ = variant_partition.VariantPartition(
         tempdir.create_temp_file(suffix='.yaml',
                                  lines='\n'.join(dup_table_name)))
예제 #11
0
 def test_config_failed_duplicate_residual_partition(self):
   tempdir = temp_dir.TempDir()
   duplicate_residual = [
       '-  partition:',
       '     partition_name: "all_remaining"',
       '     regions:',
       '       - "residual"',
       '-  partition:',
       '     partition_name: "chr01"',
       '     regions:',
       '       - "chr1"',
       '-  partition:',
       '     partition_name: "all_remaining_2"',
       '     regions:',
       '       - "residual"',
   ]
   with self.assertRaisesRegexp(
       ValueError,
       'There must be only one residual partition.'):
     _ = variant_partition.VariantPartition(
         tempdir.create_temp_file(suffix='.yaml',
                                  lines='\n'.join(duplicate_residual)))
예제 #12
0
  def test_config_residual_partition_absent(self):
    partitioner = variant_partition.VariantPartition(
        'gcp_variant_transforms/testing/data/partition_configs/'
        'residual_missing.yaml')
    self.assertFalse(partitioner.should_flatten())
    self.assertEqual(partitioner.get_num_partitions(), 5)
    # All partitions excpet the last one (dummy residual) should be kept.
    for i in range(partitioner.get_num_partitions() - 1):
      self.assertTrue(partitioner.should_keep_partition(i))
    self.assertFalse(partitioner.should_keep_partition(5 - 1))

    # 'chr1:0-1,000,000'
    self.assertEqual(partitioner.get_partition('chr1', 0), 0)
    self.assertEqual(partitioner.get_partition('chr1', 999999), 0)
    # 'chr1:1,000,000-2,000,000'
    self.assertEqual(partitioner.get_partition('chr1', 1000000), 1)
    self.assertEqual(partitioner.get_partition('chr1', 1999999), 1)
    # 'chr2' OR 'ch2' OR 'c2' OR '2'
    self.assertEqual(partitioner.get_partition('chr2', 0), 2)
    self.assertEqual(partitioner.get_partition('chr2', 999999999000), 2)
    # '3:500,000-1,000,000'
    self.assertEqual(partitioner.get_partition('3', 500000), 3)
    self.assertEqual(partitioner.get_partition('3', 999999), 3)

    # All the followings are assigned to residual partition.
    self.assertEqual(partitioner.get_partition('chr1', 2000000), 4)
    self.assertEqual(partitioner.get_partition('chr1', 999999999), 4)

    self.assertEqual(partitioner.get_partition('3', 0), 4)
    self.assertEqual(partitioner.get_partition('3', 499999), 4)
    self.assertEqual(partitioner.get_partition('3', 1000000), 4)

    self.assertEqual(partitioner.get_partition('ch2', 0), 4)
    self.assertEqual(partitioner.get_partition('c2', 0), 4)
    self.assertEqual(partitioner.get_partition('2', 0), 4)

    self.assertEqual(partitioner.get_partition('c4', 0), 4)
    self.assertEqual(partitioner.get_partition('cr5', 0), 4)
    self.assertEqual(partitioner.get_partition('chr6', 0), 4)
예제 #13
0
def run(argv=None):
    # type: (List[str]) -> None
    """Runs VCF to BigQuery pipeline."""
    logging.info('Command: %s', ' '.join(argv or sys.argv))
    known_args, pipeline_args = vcf_to_bq_common.parse_args(
        argv, _COMMAND_LINE_OPTIONS)
    # Note VepRunner creates new input files, so it should be run before any
    # other access to known_args.input_pattern.
    if known_args.run_annotation_pipeline:
        runner = vep_runner.create_runner_and_update_args(
            known_args, pipeline_args)
        runner.run_on_all_files()
        runner.wait_until_done()
        logging.info('Using VEP processed files: %s', known_args.input_pattern)

    variant_merger = _get_variant_merge_strategy(known_args)
    pipeline_mode = vcf_to_bq_common.get_pipeline_mode(
        known_args.input_pattern, known_args.optimize_for_large_inputs)

    # Starts a pipeline to merge VCF headers in beam if the total files that
    # match the input pattern exceeds _SMALL_DATA_THRESHOLD
    _merge_headers(known_args, pipeline_args, pipeline_mode)

    # Retrieve merged headers prior to launching the pipeline. This is needed
    # since the BigQuery schema cannot yet be dynamically created based on input.
    # See https://issues.apache.org/jira/browse/BEAM-2801.
    header_fields = vcf_header_parser.get_vcf_headers(
        known_args.representative_header_file)
    counter_factory = metrics_util.CounterFactory()
    processed_variant_factory = processed_variant.ProcessedVariantFactory(
        header_fields, known_args.split_alternate_allele_info_fields,
        known_args.annotation_fields, known_args.use_allele_num,
        known_args.minimal_vep_alt_matching, counter_factory)

    partitioner = None
    if known_args.optimize_for_large_inputs or known_args.partition_config_path:
        partitioner = variant_partition.VariantPartition(
            known_args.partition_config_path)

    beam_pipeline_options = pipeline_options.PipelineOptions(pipeline_args)
    pipeline = beam.Pipeline(options=beam_pipeline_options)
    variants = _read_variants(pipeline, known_args)
    variants |= 'FilterVariants' >> filter_variants.FilterVariants(
        reference_names=known_args.reference_names)
    if partitioner:
        num_partitions = partitioner.get_num_partitions()
        partitioned_variants = variants | 'PartitionVariants' >> beam.Partition(
            partition_variants.PartitionVariants(partitioner), num_partitions)
        variants = []
        for i in range(num_partitions):
            if partitioner.should_keep_partition(i):
                variants.append(partitioned_variants[i])
            else:
                num_partitions -= 1
    else:
        # By default we don't partition the data, so we have only 1 partition.
        num_partitions = 1
        variants = [variants]

    for i in range(num_partitions):
        if variant_merger:
            variants[i] |= ('MergeVariants' + str(i) >>
                            merge_variants.MergeVariants(variant_merger))
        variants[i] |= (
            'ProcessVaraints' + str(i) >>
            beam.Map(processed_variant_factory.create_processed_variant).\
                with_output_types(processed_variant.ProcessedVariant))
    if partitioner and partitioner.should_flatten():
        variants = [variants | 'FlattenPartitions' >> beam.Flatten()]
        num_partitions = 1

    for i in range(num_partitions):
        table_suffix = ''
        if partitioner and partitioner.get_partition_name(i):
            table_suffix = '_' + partitioner.get_partition_name(i)
        table_name = known_args.output_table + table_suffix
        _ = (
            variants[i] | 'VariantToBigQuery' + table_suffix >>
            variant_to_bigquery.VariantToBigQuery(
                table_name,
                header_fields,
                variant_merger,
                processed_variant_factory,
                append=known_args.append,
                allow_incompatible_records=known_args.
                allow_incompatible_records,
                omit_empty_sample_calls=known_args.omit_empty_sample_calls,
                num_bigquery_write_shards=known_args.num_bigquery_write_shards)
        )

    result = pipeline.run()
    result.wait_until_finish()

    metrics_util.log_all_counters(result)
예제 #14
0
def run(argv=None):
  # type: (List[str]) -> None
  """Runs VCF to BigQuery pipeline."""
  logging.info('Command: %s', ' '.join(argv or sys.argv))
  known_args, pipeline_args = pipeline_common.parse_args(argv,
                                                         _COMMAND_LINE_OPTIONS)

  if known_args.auto_flags_experiment:
    _get_input_dimensions(known_args, pipeline_args)

  annotated_vcf_pattern = _run_annotation_pipeline(known_args, pipeline_args)

  all_patterns = (
      [annotated_vcf_pattern] if annotated_vcf_pattern
      else known_args.all_patterns)

  variant_merger = _get_variant_merge_strategy(known_args)

  pipeline_mode = pipeline_common.get_pipeline_mode(
      all_patterns,
      known_args.optimize_for_large_inputs)
  # Starts a pipeline to merge VCF headers in beam if the total files that
  # match the input pattern exceeds _SMALL_DATA_THRESHOLD
  _merge_headers(known_args, pipeline_args,
                 pipeline_mode, annotated_vcf_pattern)


  # Retrieve merged headers prior to launching the pipeline. This is needed
  # since the BigQuery schema cannot yet be dynamically created based on input.
  # See https://issues.apache.org/jira/browse/BEAM-2801.
  header_fields = vcf_header_parser.get_vcf_headers(
      known_args.representative_header_file)
  counter_factory = metrics_util.CounterFactory()
  processed_variant_factory = processed_variant.ProcessedVariantFactory(
      header_fields,
      known_args.split_alternate_allele_info_fields,
      known_args.allow_malformed_records,
      known_args.annotation_fields,
      known_args.use_allele_num,
      known_args.minimal_vep_alt_matching,
      known_args.infer_annotation_types,
      counter_factory)

  partitioner = None
  if ((known_args.optimize_for_large_inputs and variant_merger) or
      known_args.partition_config_path):
    partitioner = variant_partition.VariantPartition(
        known_args.partition_config_path)

  beam_pipeline_options = pipeline_options.PipelineOptions(pipeline_args)
  pipeline = beam.Pipeline(options=beam_pipeline_options)
  variants = _read_variants(all_patterns, pipeline, known_args, pipeline_mode)
  variants |= 'FilterVariants' >> filter_variants.FilterVariants(
      reference_names=known_args.reference_names)
  if partitioner:
    num_partitions = partitioner.get_num_partitions()
    partitioned_variants = variants | 'PartitionVariants' >> beam.Partition(
        partition_variants.PartitionVariants(partitioner), num_partitions)
    variants = []
    for i in range(num_partitions):
      if partitioner.should_keep_partition(i):
        variants.append(partitioned_variants[i])
      else:
        num_partitions -= 1
  else:
    # By default we don't partition the data, so we have only 1 partition.
    num_partitions = 1
    variants = [variants]

  for i in range(num_partitions):
    if variant_merger:
      variants[i] |= ('MergeVariants' + str(i) >>
                      merge_variants.MergeVariants(variant_merger))
    variants[i] |= (
        'ProcessVariants' + str(i) >>
        beam.Map(processed_variant_factory.create_processed_variant).\
            with_output_types(processed_variant.ProcessedVariant))
  if partitioner and partitioner.should_flatten():
    variants = [variants | 'FlattenPartitions' >> beam.Flatten()]
    num_partitions = 1

  if known_args.output_table:
    for i in range(num_partitions):
      table_suffix = ''
      if partitioner and partitioner.get_partition_name(i):
        table_suffix = '_' + partitioner.get_partition_name(i)
      table_name = known_args.output_table + table_suffix
      _ = (variants[i] | 'VariantToBigQuery' + table_suffix >>
           variant_to_bigquery.VariantToBigQuery(
               table_name,
               header_fields,
               variant_merger,
               processed_variant_factory,
               append=known_args.append,
               update_schema_on_append=known_args.update_schema_on_append,
               allow_incompatible_records=known_args.allow_incompatible_records,
               omit_empty_sample_calls=known_args.omit_empty_sample_calls,
               num_bigquery_write_shards=known_args.num_bigquery_write_shards,
               null_numeric_value_replacement=(
                   known_args.null_numeric_value_replacement)))

  if known_args.output_avro_path:
    # TODO(bashir2): Add an integration test that outputs to Avro files and
    # also imports to BigQuery. Then import those Avro outputs using the bq
    # tool and verify that the two tables are identical.
    _ = (
        variants | 'FlattenToOnePCollection' >> beam.Flatten()
        | 'VariantToAvro' >>
        variant_to_avro.VariantToAvroFiles(
            known_args.output_avro_path,
            header_fields,
            processed_variant_factory,
            variant_merger=variant_merger,
            allow_incompatible_records=known_args.allow_incompatible_records,
            omit_empty_sample_calls=known_args.omit_empty_sample_calls,
            null_numeric_value_replacement=(
                known_args.null_numeric_value_replacement))
    )

  result = pipeline.run()
  result.wait_until_finish()

  metrics_util.log_all_counters(result)
예제 #15
0
  def test_config_failed_overlapping_regions(self):
    tempdir = temp_dir.TempDir()
    overlapping_regions = [
        '-  partition:',
        '     partition_name: "chr01_part1"',
        '     regions:',
        '       - "chr1:0-1,000,000"',
        '-  partition:',
        '     partition_name: "chr01_part2_overlapping"',
        '     regions:',
        '       - "chr1:999,999-2,000,000"',
    ]
    with self.assertRaisesRegexp(
        ValueError, 'Cannot add overlapping region *'):
      _ = variant_partition.VariantPartition(
          tempdir.create_temp_file(suffix='.yaml',
                                   lines='\n'.join(overlapping_regions)))

    full_and_partial = [
        '-  partition:',
        '     partition_name: "chr01_full"',
        '     regions:',
        '       - "chr1"',
        '-  partition:',
        '     partition_name: "chr01_part_overlapping"',
        '     regions:',
        '       - "chr1:1,000,000-2,000,000"',
    ]
    with self.assertRaisesRegexp(
        ValueError, 'Cannot add overlapping region *'):
      _ = variant_partition.VariantPartition(
          tempdir.create_temp_file(suffix='.yaml',
                                   lines='\n'.join(full_and_partial)))

    partial_and_full = [
        '-  partition:',
        '     partition_name: "chr01_part"',
        '     regions:',
        '       - "chr1:1,000,000-2,000,000"',
        '-  partition:',
        '     partition_name: "chr01_full_overlapping"',
        '     regions:',
        '       - "chr1"',
    ]
    with self.assertRaisesRegexp(
        ValueError, 'Cannot add overlapping region *'):
      _ = variant_partition.VariantPartition(
          tempdir.create_temp_file(suffix='.yaml',
                                   lines='\n'.join(partial_and_full)))

    full_and_full = [
        '-  partition:',
        '     partition_name: "chr01_full"',
        '     regions:',
        '       - "chr1"',
        '-  partition:',
        '     partition_name: "chr02_part"',
        '     regions:',
        '       - "chr2:1,000,000-2,000,000"',
        '-  partition:',
        '     partition_name: "chr01_full_redundant"',
        '     regions:',
        '       - "chr1"',
    ]
    with self.assertRaisesRegexp(
        ValueError, 'Cannot add overlapping region *'):
      _ = variant_partition.VariantPartition(
          tempdir.create_temp_file(suffix='.yaml',
                                   lines='\n'.join(full_and_full)))