def make_examples_runner(options): """Runs examples creation stage of deepvariant.""" resource_monitor = resources.ResourceMonitor().start() logging.info('Preparing inputs') regions = processing_regions_from_options(options) # Create a processor to create candidates and examples for each region. region_processor = RegionProcessor(options) logging.info('Writing examples to %s', options.examples_filename) if options.candidates_filename: logging.info('Writing candidates to %s', options.candidates_filename) if options.gvcf_filename: logging.info('Writing gvcf records to %s', options.gvcf_filename) n_regions, n_candidates, n_examples = 0, 0, 0 with OutputsWriter(options) as writer: for region in regions: candidates, examples, gvcfs = region_processor.process(region) n_candidates += len(candidates) n_examples += len(examples) n_regions += 1 writer.write_candidates(*candidates) # If we have any gvcf records, write them out. This if also serves to # protect us from trying to write to the gvcfs output of writer when gvcf # generation is turned off. In that case, gvcfs will always be empty and # we'll never execute the write. if gvcfs: writer.write_gvcfs(*gvcfs) writer.write_examples(*examples) # Construct and then write out our MakeExamplesRunInfo proto. if options.run_info_filename: run_info = deepvariant_pb2.MakeExamplesRunInfo( options=options, resource_metrics=resource_monitor.metrics()) if in_training_mode(options): if region_processor.labeler.metrics is not None: run_info.labeling_metrics.CopyFrom( region_processor.labeler.metrics) else: logging.warning( 'Labeling metrics requested but the selected labeling ' 'algorithm %s does not collect metrics; skipping.', options.labeler_algorithm) logging.info('Writing MakeExamplesRunInfo to %s', options.run_info_filename) write_make_examples_run_info(run_info, path=options.run_info_filename) logging.info('Found %s candidate variants', n_candidates) logging.info('Created %s examples', n_examples)
def read_make_examples_run_info(path): """Reads a MakeExamplesRunInfo proto in text_format from path.""" with tf.gfile.GFile(path) as f: return text_format.Parse(f.read(), deepvariant_pb2.MakeExamplesRunInfo())