def testFailedExampleImageShape(self): # Create an empty example that doesn't have the required image/shape field. example = example_pb2.Example() with self.assertRaisesRegexp( ValueError, 'Invalid image/shape: we expect to find an ' 'image/shape field with length 3.'): tf_utils.example_image_shape(example)
def test_create_pileup_examples(self): self.processor.pic = mock.Mock() self.add_mock('_encode_tensor', side_effect=[ ('tensor1', self.default_shape, self.default_format), ('tensor2', self.default_shape, self.default_format) ]) dv_call = mock.Mock() dv_call.variant = test_utils.make_variant(start=10, alleles=['A', 'C', 'G']) ex = mock.Mock() alt1, alt2 = ['C'], ['G'] self.processor.pic.create_pileup_images.return_value = [ (alt1, 'tensor1'), (alt2, 'tensor2') ] actual = self.processor.create_pileup_examples(dv_call) self.processor.pic.create_pileup_images.assert_called_once_with( dv_call) self.assertEquals(len(actual), 2) for ex, (alt, img) in zip(actual, [(alt1, 'tensor1'), (alt2, 'tensor2')]): self.assertEqual(tf_utils.example_alt_alleles(ex), alt) self.assertEqual(tf_utils.example_variant(ex), dv_call.variant) self.assertEqual(tf_utils.example_encoded_image(ex), img) self.assertEqual(tf_utils.example_image_shape(ex), self.default_shape) self.assertEqual(tf_utils.example_image_format(ex), self.default_format)
def test_create_pileup_examples(self): self.processor.pic = mock.Mock() self.add_mock( '_encode_tensor', side_effect=[('tensor1', self.default_shape, self.default_format), ('tensor2', self.default_shape, self.default_format)]) dv_call = mock.Mock() dv_call.variant = test_utils.make_variant(start=10, alleles=['A', 'C', 'G']) ex = mock.Mock() alt1, alt2 = ['C'], ['G'] self.processor.pic.create_pileup_images.return_value = [(alt1, 'tensor1'), (alt2, 'tensor2')] actual = self.processor.create_pileup_examples(dv_call) self.processor.pic.create_pileup_images.assert_called_once_with(dv_call) self.assertEquals(len(actual), 2) for ex, (alt, img) in zip(actual, [(alt1, 'tensor1'), (alt2, 'tensor2')]): self.assertEqual(tf_utils.example_alt_alleles(ex), alt) self.assertEqual(tf_utils.example_variant(ex), dv_call.variant) self.assertEqual(tf_utils.example_encoded_image(ex), img) self.assertEqual(tf_utils.example_image_shape(ex), self.default_shape) self.assertEqual(tf_utils.example_image_format(ex), self.default_format)
def test_create_pileup_examples(self): self.processor.pic = mock.Mock() self.processor.pic.get_reads.return_value = [] self.add_mock('_encode_tensor', side_effect=[(six.b('tensor1'), self.default_shape, self.default_format), (six.b('tensor2'), self.default_shape, self.default_format)]) dv_call = mock.Mock() dv_call.variant = test_utils.make_variant(start=10, alleles=['A', 'C', 'G']) ex = mock.Mock() alt1, alt2 = ['C'], ['G'] self.processor.pic.create_pileup_images.return_value = [ (alt1, six.b('tensor1')), (alt2, six.b('tensor2')) ] actual = self.processor.create_pileup_examples(dv_call) self.processor.pic.create_pileup_images.assert_called_once_with( dv_call=dv_call, reads_for_samples=[[]], haplotype_alignments_for_samples=None, haplotype_sequences=None, sample_order=None) self.assertLen(actual, 2) for ex, (alt, img) in zip(actual, [(alt1, six.b('tensor1')), (alt2, six.b('tensor2'))]): self.assertEqual(tf_utils.example_alt_alleles(ex), alt) self.assertEqual(tf_utils.example_variant(ex), dv_call.variant) self.assertEqual(tf_utils.example_encoded_image(ex), img) self.assertEqual(tf_utils.example_image_shape(ex), self.default_shape) self.assertEqual(tf_utils.example_image_format(ex), six.b(self.default_format))
def testExampleImageShape(self): example = tf_utils.make_example(self.variant, self.alts, self.encoded_image, self.default_shape, self.default_format) self.assertEqual(self.default_shape, tf_utils.example_image_shape(example))
def call_variants(examples_filename, checkpoint_path, model, output_file, execution_hardware='auto', batch_size=16, max_batches=None, use_tpu=False, master=''): """Main driver of call_variants.""" if FLAGS.kmp_blocktime: os.environ['KMP_BLOCKTIME'] = FLAGS.kmp_blocktime logging.vlog(3, 'Set KMP_BLOCKTIME to {}'.format(os.environ['KMP_BLOCKTIME'])) # Read a single TFExample to make sure we're not loading an older version. first_example = tf_utils.get_one_example_from_examples_path(examples_filename) if first_example is None: logging.warning( 'Unable to read any records from %s. Output will contain ' 'zero records.', examples_filename) tfrecord.write_tfrecords([], output_file) return example_format = tf_utils.example_image_format(first_example) example_shape = tf_utils.example_image_shape(first_example) if example_format != six.b('raw'): raise ValueError('The TF examples in {} has image/format \'{}\' ' '(expected \'raw\') which means you might need to rerun ' 'make_examples to generate the examples again.'.format( examples_filename, example_format)) logging.info('Shape of input examples: %s', str(example_shape)) if checkpoint_path is not None: reader = tf.compat.v1.train.NewCheckpointReader(checkpoint_path) shape_map_for_layers = reader.get_variable_to_shape_map() first_layer = 'InceptionV3/Conv2d_1a_3x3/weights' # For a shape map of [3, 3, 6, 32] for the Conv2d_1a_3x3 layer, the 6 # is the number of channels. num_channels_in_checkpoint_model = shape_map_for_layers[first_layer][2] if num_channels_in_checkpoint_model != example_shape[2]: raise ValueError('The number of channels in examples and checkpoint ' 'should match, but the checkpoint has {} channels while ' 'the examples have {}.'.format( num_channels_in_checkpoint_model, example_shape[2])) # The model checkpoint includes information on the number of channels but # unfortunately not the width or height. if example_shape[0] not in [100, 300]: logging.warning('The height of the input image is not 100 (standard in ' 'DeepVariant) or 300 (standard in DeepTrio). ' 'Please double-check that the model is trained with the ' 'same parameters and version of DeepVariant as you ' 'generated the examples with. An error will not appear ' 'when these are mismatched because of how InceptionV3 ' 'works. Note that if you set --pileup_image_height in ' 'DeepVariant, then you must use a model trained with ' 'that same parameter.') if example_shape[1] != 221: logging.warning('The width of the input image is not 221 (standard in ' 'DeepVariant). ' 'Please double-check that the model is trained with the ' 'same parameters and version of DeepVariant as you ' 'generated the examples with. An error will not appear ' 'when these are mismatched because of how InceptionV3 ' 'works. Note that if you set --pileup_image_width in ' 'DeepVariant, then you must use a model trained with ' 'that same parameter.') # Check accelerator status. if execution_hardware not in _ALLOW_EXECUTION_HARDWARE: raise ValueError( 'Unexpected execution_hardware={} value. Allowed values are {}'.format( execution_hardware, ','.join(_ALLOW_EXECUTION_HARDWARE))) init_op = tf.group(tf.compat.v1.global_variables_initializer(), tf.compat.v1.local_variables_initializer()) config = tf.compat.v1.ConfigProto() if FLAGS.config_string is not None: text_format.Parse(FLAGS.config_string, config) if execution_hardware == 'cpu': # Don't overwrite entire dictionary. config.device_count['GPU'] = 0 config.device_count['TPU'] = 0 # Perform sanity check. with tf.compat.v1.Session(config=config) as sess: sess.run(init_op) if execution_hardware == 'accelerator': if not any(dev.device_type != 'CPU' for dev in sess.list_devices()): raise ExecutionHardwareError( 'execution_hardware is set to accelerator, but no accelerator ' 'was found') # redacted # sess.list_devices here doesn't return the correct answer. That can only # work later, after the device (on the other VM) has been initialized, # which is generally not yet. # Prepare input stream and estimator. tf_dataset = prepare_inputs(source_path=examples_filename, use_tpu=use_tpu) if FLAGS.use_openvino: ie_estimator = OpenVINOEstimator( checkpoint_path, input_fn=tf_dataset, model=model) predictions = iter(ie_estimator) else: estimator = model.make_estimator( batch_size=batch_size, master=master, use_tpu=use_tpu, session_config=config, ) # Instantiate the prediction "stream", and select the EMA values from # the model. if checkpoint_path is None: # Unit tests use this branch. predict_hooks = [] else: predict_hooks = [ h(checkpoint_path) for h in model.session_predict_hooks() ] predictions = iter( estimator.predict( input_fn=tf_dataset, checkpoint_path=checkpoint_path, hooks=predict_hooks)) # Consume predictions one at a time and write them to output_file. logging.info('Writing calls to %s', output_file) writer = tfrecord.Writer(output_file) with writer: start_time = time.time() n_examples, n_batches = 0, 0 while max_batches is None or n_batches <= max_batches: try: prediction = next(predictions) except (StopIteration, tf.errors.OutOfRangeError): break write_variant_call(writer, prediction, use_tpu) n_examples += 1 n_batches = n_examples // batch_size + 1 duration = time.time() - start_time if not FLAGS.use_openvino: logging.log_every_n( logging.INFO, ('Processed %s examples in %s batches [%.3f sec per 100]'), _LOG_EVERY_N, n_examples, n_batches, (100 * duration) / n_examples) # One last log to capture the extra examples. if not FLAGS.use_openvino: logging.info('Processed %s examples in %s batches [%.3f sec per 100]', n_examples, n_batches, (100 * duration) / n_examples) logging.info('Done calling variants from a total of %d examples.', n_examples)