예제 #1
0
def make_examples_runner(options):
    """Runs examples creation stage of deepvariant."""
    resource_monitor = resources.ResourceMonitor().start()
    logging.info('Preparing inputs')
    regions = processing_regions_from_options(options)

    # Create a processor to create candidates and examples for each region.
    region_processor = RegionProcessor(options)

    logging.info('Writing examples to %s', options.examples_filename)
    if options.candidates_filename:
        logging.info('Writing candidates to %s', options.candidates_filename)
    if options.gvcf_filename:
        logging.info('Writing gvcf records to %s', options.gvcf_filename)

    n_regions, n_candidates, n_examples = 0, 0, 0
    with OutputsWriter(options) as writer:
        for region in regions:
            candidates, examples, gvcfs = region_processor.process(region)
            n_candidates += len(candidates)
            n_examples += len(examples)
            n_regions += 1

            writer.write_candidates(*candidates)

            # If we have any gvcf records, write them out. This if also serves to
            # protect us from trying to write to the gvcfs output of writer when gvcf
            # generation is turned off. In that case, gvcfs will always be empty and
            # we'll never execute the write.
            if gvcfs:
                writer.write_gvcfs(*gvcfs)
            writer.write_examples(*examples)

    # Construct and then write out our MakeExamplesRunInfo proto.
    if options.run_info_filename:
        run_info = deepvariant_pb2.MakeExamplesRunInfo(
            options=options, resource_metrics=resource_monitor.metrics())
        if in_training_mode(options):
            if region_processor.labeler.metrics is not None:
                run_info.labeling_metrics.CopyFrom(
                    region_processor.labeler.metrics)
            else:
                logging.warning(
                    'Labeling metrics requested but the selected labeling '
                    'algorithm %s does not collect metrics; skipping.',
                    options.labeler_algorithm)
        logging.info('Writing MakeExamplesRunInfo to %s',
                     options.run_info_filename)
        write_make_examples_run_info(run_info, path=options.run_info_filename)

    logging.info('Found %s candidate variants', n_candidates)
    logging.info('Created %s examples', n_examples)
예제 #2
0
def read_make_examples_run_info(path):
    """Reads a MakeExamplesRunInfo proto in text_format from path."""
    with tf.gfile.GFile(path) as f:
        return text_format.Parse(f.read(),
                                 deepvariant_pb2.MakeExamplesRunInfo())