def Inference(self): if py_utils.use_tpu(): raise NotImplementedError('TPU is not supported.') with tf.name_scope('inference'): feed1 = tf.placeholder(name='feed1_node', dtype=tf.float32, shape=[1]) fetch1 = tf.identity(feed1, name='fetch1_node') return { 'default': ( py_utils.NestedMap({ 'fetch1': fetch1, 'fetch_op': fetch1.op, # Tests that ops are supported. }), py_utils.NestedMap({ 'feed1': feed1, })), 'unused': (py_utils.NestedMap({}), py_utils.NestedMap({})), }
def testDenseLayerSigns(self): """EG-DD update.""" with self.cached_session() as sess: var = tf.Variable([0.5, 1.0]) grad = tf.placeholder(tf.float32, shape=[2]) opt = egdd.EGDD(learning_rate=0.1, momentum=0.9, beta=0.1, gain_learning_rate=1e-2, scale_learning_rate=1e-3, use_signs=True) step = opt.apply_gradients([(grad, var)]) tf.global_variables_initializer().run() pre_var = sess.run(var) pre_momentum = sess.run(opt.get_slot(var, 'momentum')) pre_gain = sess.run(opt.get_slot(var, 'gain')) pre_lr_scale = sess.run(opt.get_slot(var, 'lr_scale')) self.assertAllClose([0.5, 1.0], pre_var) self.assertAllClose([0.0, 0.0], pre_momentum) self.assertAllClose([1.0, 1.0], pre_gain) self.assertAllClose([1.0], pre_lr_scale) sess.run(step, feed_dict={grad: [0.1, -0.5]}) pre_var = sess.run(var) pre_momentum = sess.run(opt.get_slot(var, 'momentum')) pre_gain = sess.run(opt.get_slot(var, 'gain')) pre_lr_scale = sess.run(opt.get_slot(var, 'lr_scale')) self.assertAllClose([0.49, 1.05], pre_var) self.assertAllClose([0.01, -0.05], pre_momentum) self.assertAllClose([1, 1], pre_gain) self.assertAllClose([1.0], pre_lr_scale) sess.run(step, feed_dict={grad: [-1.0, -1.5]}) pre_var = sess.run(var) pre_momentum = sess.run(opt.get_slot(var, 'momentum')) pre_gain = sess.run(opt.get_slot(var, 'gain')) pre_lr_scale = sess.run(opt.get_slot(var, 'lr_scale')) self.assertAllClose([0.5801, 1.2466], pre_var, atol=1e-4) self.assertAllClose([-0.0900, -0.1965], pre_momentum, atol=1e-4) self.assertAllClose([0.9900, 1.0101], pre_gain, atol=1e-4) self.assertAllClose([1.0007], pre_lr_scale, atol=1e-4)
def _CreateAsrFeatures(): # First pass: extract transcription files. if os.path.exists(FLAGS.transcripts_filepath): trans = _LoadTranscriptionsFromFile() else: tf.logging.info('Running first pass on the fly') trans = _ReadTranscriptions() tf.logging.info('Total transcripts: %d', len(trans)) tf_bytes = tf.placeholder(dtype=tf.string) # Great! It uses the frontend directly log_mel = audio_lib.ExtractLogMelFeatures(tf_bytes) # Second pass: transcode the flac. file_obj = tf.io.gfile.GFile(FLAGS.input_tarball, mode='rb') tar = tarfile.open(fileobj=file_obj, mode='r:gz') n = 0 recordio_writers = _OpenSubShards() tfconf = tf.config_pb2.ConfigProto() tfconf.gpu_options.allow_growth = True with tf.Session(config=tfconf) as sess: for tarinfo in tar: if not tarinfo.name.endswith('.flac'): continue n += 1 if n % FLAGS.num_shards != FLAGS.shard_id: continue uttid = re.sub('.*/(.+)\\.flac', '\\1', tarinfo.name) f = tar.extractfile(tarinfo) wav_bytes = audio_lib.DecodeFlacToWav(f.read()) f.close() frames = sess.run(log_mel, feed_dict={tf_bytes: wav_bytes}) assert uttid in trans, uttid num_words = len(trans[uttid]) tf.logging.info('utt[%d]: %s [%d frames, %d words]', n, uttid, frames.shape[1], num_words) ex = _MakeTfExample(uttid, frames, trans[uttid]) outf = _SelectRandomShard(recordio_writers) outf.write(ex.SerializeToString()) tar.close() file_obj.close() _CloseSubShards(recordio_writers)
def _config_infeed(self, num_partitions, device_assignment, batch_size, key_size=2, return_tgt_mask=False, use_partitioned_infeed_queue=False): """Config the infeed ops and args.""" zero_batch = get_zero_batch(batch_size=batch_size, max_len=self._prefix_max_len, key_size=key_size, return_tgt_mask=return_tgt_mask) host_device = device_assignment.host_device(replica=0, job=self._tpu) host_id = int(host_device.split('/task:')[1].split('/device:')[0]) input_partition_dims = [[num_partitions] + [1] * (len(x.shape) - 1) for x in zero_batch] if use_partitioned_infeed_queue: infeed = tpu_feed._PartitionedInfeedQueue( # pylint: disable=protected-access number_of_tuple_elements=len(zero_batch), host_id=host_id, input_partition_dims=input_partition_dims, device_assignment=device_assignment) else: infeed = tpu_feed.InfeedQueue( number_of_tuple_elements=len(zero_batch)) self.infeed_args = [] for x in zero_batch: p = tf.placeholder(tf.as_dtype(x.dtype), shape=x.shape) self.infeed_args += [p] if use_partitioned_infeed_queue: self.infeed_op = infeed.generate_enqueue_ops([self.infeed_args]) else: self.infeed_op = infeed.split_inputs_and_generate_enqueue_ops( self.infeed_args, device_assignment=device_assignment) return infeed
def _Placeholders(self): """Return a NestedMap of placeholders to fill in for inference. Runs the configured input pipeline to generate the expected shapes and types of the inputs. Returns: A NestedMap of placeholders matching the input structure of the inference model. """ p = self.params with tf.Graph().as_default(): inputs = self.params.input.Instantiate() # Turn those inputs into placeholders. placeholders = [] for input_shape, dtype in zip(inputs.Shape().Flatten(), inputs.DType().Flatten()): batched_input_shape = [p.inference_batch_size] + input_shape.as_list() placeholders.append(tf.placeholder(dtype, batched_input_shape)) result = inputs.DType().Pack(placeholders) return result
def _create_graph(self): if self._sess is not None: return with self._cluster: cfg = model_registry.GetParams(self._model_name, self._split) cfg.input.batch_size = 1 # Turn off label filtering so the database contains # all objects. cfg.input.extractors.labels.filter_labels = None # Disable preprocessors if they are not required. if not self._run_preprocessors: cfg.input.preprocessors_order = [] graph = tf.Graph() with graph.as_default(): inp = cfg.input.Instantiate() self._elem = tf.placeholder(tf.string) bucket, batch = inp.ExtractUsingExtractors(self._elem) self._filtered_data = _GetFilteredBoundingBoxData(batch) self._bucket = bucket self._sess = tf.Session(graph=graph)
def _InferenceSubgraph_Default(self): """Default inference subgraph. Returns: (fetches, feeds): - fetches: A dictionary of fetches, containing: - log_pplx_per_token: A matrix of shape [batch, time]. [i, j] is i-th input text's j-th token's log prob. - paddings: A matrix of shape [batch, time]. The padding mask. - log_pplx_per_sample: A vector of shape [batch]. [i] is i-th input text's log prob. - num_oovs_per_sample: A vector of shape [batch] counting the total number of out-of-vocabulary tokens in each input. - tokens_from_labels: A vector of shape [batch] returning the predicted tokens as a sequence after mapping them back to strings from ids using the vocabulary. - ids: A matrix of shape [batch, time]. [i, j] is i-th input text's j-th token's id. - feeds: A dictionary of feeds, containing: - text: A placeholder for a vector of strings. """ text = tf.placeholder(tf.string, shape=[None]) # [batch, time] ids, labels, paddings = self.input_generator.StringsToIds(text) lengths = tf.reduce_sum(tf.cast(1 - paddings, tf.int32), axis=1) tokens_from_labels = self.input_generator.IdsToStrings(labels, lengths) oovs = tf.equal(labels, self.input_generator.tokenizer.unk_id) num_oovs_per_sample = tf.cast( tf.round( tf.reduce_sum(tf.cast(oovs, tf.float32) * (1 - paddings), axis=1)), tf.int32) # [time, batch] ids, paddings, labels, weights = self._TrimIfPossibleThenTranspose( ids, paddings, labels, 1.0 - paddings) batch_size = tf.shape(ids)[1] xent_output, _ = self.lm.FPropDefaultTheta( inputs=ids, paddings=paddings, state0=self.lm.zero_state(self.theta.lm, batch_size), labels=py_utils.NestedMap(class_ids=labels, class_weights=weights)) per_example_xent = py_utils.HasShape(xent_output.per_example_xent, tf.shape(ids)) log_pplx_per_sample = tf.reduce_sum(per_example_xent * (1 - paddings), axis=0) fetches = { 'log_pplx_per_token': # [batch, time] tf.transpose(per_example_xent), 'paddings': # [batch, time] tf.transpose(paddings), 'lengths': # [batch] lengths, 'log_pplx_per_sample': # [batch] log_pplx_per_sample, 'num_oovs_per_sample': # [batch], int32 num_oovs_per_sample, 'tokens_from_labels': # [batch], string tokens_from_labels, 'ids': # [batch, time], int32 ids } feeds = {'text': text} return fetches, feeds
def testAccumulator(self): # testAccumulator compares # - explicit averaging of independently computed var_grads1 and # var_grads2, # - Accumulator(SGD) optimizer effectively doing this over 2 steps. np.random.seed(12345) np_input1 = np.random.normal(0.1, 0.5, [2, 4, 3]) np.random.seed(12346) np_input2 = np.random.normal(0.1, 0.5, [2, 4, 3]) with self.session(use_gpu=True, graph=tf.Graph()) as sess: tf.random.set_seed(123456) params = layers.ProjectionLayer.Params() params.name = 'proj' params.dtype = tf.float64 params.input_dim = 3 params.output_dim = 2 params.params_init = py_utils.WeightInit.Gaussian(0.01, 123456) params.batch_norm = False proj_layer = layers.ProjectionLayer(params) inputs1 = tf.placeholder(shape=[2, 4, 3], dtype=tf.float64) in_padding1 = tf.zeros([2, 4, 1], dtype=tf.float64) inputs2 = tf.placeholder(shape=[2, 4, 3], dtype=tf.float64) in_padding2 = tf.zeros([2, 4, 1], dtype=tf.float64) output1 = proj_layer.FPropDefaultTheta(inputs1, in_padding1) output2 = proj_layer.FPropDefaultTheta(inputs2, in_padding2) loss1 = tf.reduce_sum(output1) loss2 = tf.reduce_sum(output2) var_grads1 = py_utils.ComputeGradients(loss1, proj_layer.vars) var_grads2 = py_utils.ComputeGradients(loss2, proj_layer.vars) op = optimizer.SGD.Params() opt = op.Instantiate() lr = 1e-1 with tf.control_dependencies([loss1, loss2]): var_update_op1 = opt.Apply( lr, py_utils.ApplyGradMultiplier(var_grads1, 1. / 2.)) with tf.control_dependencies([var_update_op1]): var_update_op2 = opt.Apply( lr, py_utils.ApplyGradMultiplier(var_grads2, 1. / 2.)) self.evaluate(tf.global_variables_initializer()) vars1 = self.evaluate(proj_layer.vars.Flatten()) loss1_1, grads1_1, loss1_2, grads1_2 = sess.run( [ loss1, var_grads1.Transform(tuple), loss2, var_grads2.Transform(tuple) ], feed_dict={ inputs1: np_input1, inputs2: np_input2, }, ) sess.run([var_update_op2], feed_dict={ inputs1: np_input1, inputs2: np_input2, }) vars1_1 = self.evaluate(proj_layer.vars.Flatten()) with self.session(use_gpu=True, graph=tf.Graph()) as sess: tf.random.set_seed(123456) params = layers.ProjectionLayer.Params() params.name = 'proj' params.dtype = tf.float64 params.input_dim = 3 params.output_dim = 2 params.params_init = py_utils.WeightInit.Gaussian(0.01, 123456) params.batch_norm = False proj_layer = layers.ProjectionLayer(params) in_padding1 = tf.zeros([2, 4, 1], dtype=tf.float64) inputs1 = tf.placeholder(shape=[2, 4, 3], dtype=tf.float64) output1 = proj_layer.FPropDefaultTheta(inputs1, in_padding1) loss = tf.reduce_sum(output1) var_grads = py_utils.ComputeGradients(loss, proj_layer.vars) op = optimizer.Accumulator.Params().Set( accum_steps=2, dtype=tf.float64, optimizer_tpl=optimizer.SGD.Params()) opt = op.Instantiate() lr = 1e-1 with cluster_factory.ForTestingWorker(add_summary=True): var_update_op = opt.Apply(lr, var_grads) increment_global_step_op = tf.assign_add( py_utils.GetOrCreateGlobalStepVar(), 1) self.evaluate(tf.global_variables_initializer()) vars2 = self.evaluate(proj_layer.vars.Flatten()) loss2_1, grads2_1 = sess.run( [loss, var_grads.Transform(tuple)], feed_dict={ inputs1: np_input1, }) loss2_2, grads2_2 = sess.run( [loss, var_grads.Transform(tuple)], feed_dict={ inputs1: np_input2, }) acc_0 = self.evaluate([ v for v in tf.global_variables() if 'grad_accumulator' in v.name ])[0] sess.run([var_update_op], feed_dict={ inputs1: np_input1, }) acc_1 = self.evaluate([ v for v in tf.global_variables() if 'grad_accumulator' in v.name ])[0] vars2_intermediate = self.evaluate(proj_layer.vars.Flatten()) self.evaluate(increment_global_step_op) sess.run([var_update_op], feed_dict={ inputs1: np_input2, }) acc_2 = self.evaluate([ v for v in tf.global_variables() if 'grad_accumulator' in v.name ])[0] vars2_1 = self.evaluate(proj_layer.vars.Flatten()) summary = tf.Summary.FromString( self.evaluate(tf.summary.merge_all())) tf.logging.info(f'summary: {summary}') self.assertEqual(summary.value[0].tag, 'sgd_lr') self.assertAllClose(vars1, vars2) self.assertAllClose(acc_0, np.zeros_like(acc_0)) self.assertAllClose(acc_1, grads2_1['w'][1]) self.assertAllClose(acc_2, np.zeros_like(acc_0)) self.assertAllClose(loss1_1, loss2_1) self.assertAllClose(loss1_2, loss2_2) self.assertAllClose(grads1_1, grads2_1) self.assertAllClose(grads1_2, grads2_2) self.assertAllClose(vars1, vars2_intermediate) self.assertAllClose(vars2[0], grads2_1['w'][0]) self.assertAllClose(vars2[0], grads2_2['w'][0]) self.assertAllClose( vars1[0] - 0.5 * lr * (grads1_1['w'][1] + grads1_2['w'][1]), vars1_1[0]) self.assertAllClose( vars2[0] - 0.5 * lr * (grads2_1['w'][1] + grads2_2['w'][1]), vars2_1[0]) self.assertAllClose(vars2, vars2_intermediate) self.assertAllClose(vars1_1, vars2_1)
def remove_shape(tensor): shape = tf.placeholder(tf.int32, name='removed_shape') return tf.reshape(tensor, shape)
def _BuildMetric(self, feed_data, classid): """Construct tensors and the feed_dict for Waymo metric op. Args: feed_data: a NestedMap returned by _GetData(). classid: integer. Returns: A tuple of 3 dicts: - scalar_metrics: a dict mapping all the metric names to fetch tensors. - curves: a dict mapping all the curve names to fetch tensors. - feed_dict: a dict mapping the tensors in feed_tensors to feed values. """ breakdown_names = config_util.get_breakdown_names_from_config( self._waymo_metric_config) if feed_data is None: dummy_scalar = tf.constant(np.nan) dummy_curve = tf.zeros( [self.metadata.NumberOfPrecisionRecallPoints(), 2], tf.float32) scalar_metrics = { 'ap': dummy_scalar, 'ap_ha_weighted': dummy_scalar } curve_metrics = {'pr': dummy_curve, 'pr_ha_weighted': dummy_curve} for i, metric in enumerate(breakdown_names): scalar_metrics['ap_%s' % metric] = dummy_scalar scalar_metrics['ap_ha_weighted_%s' % metric] = dummy_scalar curve_metrics['pr_%s' % metric] = dummy_curve curve_metrics['pr_ha_weighted_%s' % metric] = dummy_curve return py_utils.NestedMap(feed_dict={}, scalar_metrics=scalar_metrics, curve_metrics=curve_metrics) feed_dict = {} f_gt_bbox = tf.placeholder(tf.float32) feed_dict[f_gt_bbox] = feed_data.gt.bbox f_gt_imgid = tf.placeholder(tf.int32) feed_dict[f_gt_imgid] = feed_data.gt.imgid f_gt_speed = tf.placeholder(tf.float32) feed_dict[f_gt_speed] = feed_data.gt.speed f_pd_bbox = tf.placeholder(tf.float32) feed_dict[f_pd_bbox] = feed_data.pd.bbox f_pd_imgid = tf.placeholder(tf.int32) feed_dict[f_pd_imgid] = feed_data.pd.imgid f_pd_score = tf.placeholder(tf.float32) feed_dict[f_pd_score] = feed_data.pd.score num_gt_bboxes = feed_data.gt.imgid.shape[0] num_pd_bboxes = feed_data.pd.imgid.shape[0] gt_class_ids = tf.constant(classid, dtype=tf.uint8, shape=[num_gt_bboxes]) pd_class_ids = tf.constant(classid, dtype=tf.uint8, shape=[num_pd_bboxes]) ap, ap_ha, pr, pr_ha, _ = py_metrics_ops.detection_metrics( prediction_bbox=f_pd_bbox, prediction_type=pd_class_ids, prediction_score=f_pd_score, prediction_frame_id=tf.cast(f_pd_imgid, tf.int64), prediction_overlap_nlz=tf.zeros_like(f_pd_imgid, dtype=tf.bool), ground_truth_bbox=f_gt_bbox, ground_truth_type=gt_class_ids, ground_truth_frame_id=tf.cast(f_gt_imgid, tf.int64), ground_truth_difficulty=tf.zeros_like(f_gt_imgid, dtype=tf.uint8), ground_truth_speed=f_gt_speed, config=self._waymo_metric_config.SerializeToString()) # All tensors returned by Waymo's metric op have a leading dimension # B=number of breakdowns. At this moment we always use B=1 to make # it compatible to the python code. scalar_metrics = {'ap': ap[0], 'ap_ha_weighted': ap_ha[0]} curve_metrics = {'pr': pr[0], 'pr_ha_weighted': pr_ha[0]} for i, metric in enumerate(breakdown_names): # There is a scalar / curve for every breakdown. scalar_metrics['ap_%s' % metric] = ap[i] scalar_metrics['ap_ha_weighted_%s' % metric] = ap_ha[i] curve_metrics['pr_%s' % metric] = pr[i] curve_metrics['pr_ha_weighted_%s' % metric] = pr_ha[i] return py_utils.NestedMap(feed_dict=feed_dict, scalar_metrics=scalar_metrics, curve_metrics=curve_metrics)
def ComputeNumericGradient(sess, y, x, delta=1e-4, step=1, extra_feed_dict=None): """Compute the numeric gradient of y wrt to x. Args: sess: The TF session constructed with a graph containing x and y. y: A scalar TF Tensor in the graph constructed in sess. x: A TF Tensor in the graph constructed in sess. delta: Gradient checker's small perturbation of x[i]. step: Only compute numerical gradients for a subset of x values. I.e. dy/dx[i] is computed if i % step == 0. extra_feed_dict: Additional feed_dict of tensors to keep fixed during the gradient checking. Returns: A Tensor of the same shape and dtype as x. If x[i] is not chosen to compute the numerical gradient dy/x[i], the corresponding value is set to 0. """ x_data = sess.run(x) x_size = x_data.size x_shape = x_data.shape numeric_grad = np.zeros(x_size, dtype=x_data.dtype) # For variables we need to issue an assignment operation in order to update # the value of the variable. This is because with resource variables x will be # pointing to the handle rather than its value. feed_dict = extra_feed_dict or {} ph = tf.placeholder(x_data.dtype, x_shape) x_assign = x.assign(ph) if isinstance(x, tf.Variable) else None for i in range(0, x_size, step): x_pos = x_data.copy() if x_size == 1: x_pos += delta else: x_pos.flat[i] += delta if x_assign is None: feed_dict.update(dict([(x, x_pos)])) else: sess.run(x_assign, feed_dict={ph: x_pos}) y_pos = sess.run(y, feed_dict=feed_dict) x_neg = x_data.copy() if x_size == 1: x_neg -= delta else: x_neg.flat[i] -= delta if x_assign is None: feed_dict.update(dict([(x, x_neg)])) else: sess.run(x_assign, feed_dict={ph: x_neg}) y_neg = sess.run(y, feed_dict=feed_dict) numeric_grad[i] = (y_pos - y_neg) / (2 * delta) # Restore the variable back to its original value to avoid breaking any # further test code that operates on the graph. if x_assign is not None: sess.run(x_assign, feed_dict={ph: x_data}) return numeric_grad.reshape(x_shape)
def _BuildMetric(self, feed_data, classid): """Construct tensors and the feed_dict for KITTI metric op. Args: feed_data: a NestedMap returned by _GetData() classid: integer. Unused in this implementation. Returns: A tuple of 3 dicts: - scalar_metrics: a dict mapping all the metric names to fetch tensors. - curves: a dict mapping all the curve names to fetch tensors. - feed_dict: a dict mapping the tensors in feed_tensors to feed values. """ if feed_data is None: dummy_scalar = tf.constant(np.nan) dummy_curve = tf.zeros( [self.metadata.NumberOfPrecisionRecallPoints(), 2], tf.float32) scalar_metrics = {'ap': dummy_scalar} curve_metrics = {'pr': dummy_curve} return scalar_metrics, curve_metrics, {} feed_dict = {} f_iou = tf.placeholder(tf.float32) feed_dict[f_iou] = feed_data.iou_threshold f_gt_bbox = tf.placeholder(tf.float32) feed_dict[f_gt_bbox] = feed_data.gt.bbox f_gt_imgid = tf.placeholder(tf.int32) feed_dict[f_gt_imgid] = feed_data.gt.imgid f_gt_ignore = tf.placeholder(tf.int32) feed_dict[f_gt_ignore] = feed_data.gt.ignore f_pd_bbox = tf.placeholder(tf.float32) feed_dict[f_pd_bbox] = feed_data.pd.bbox f_pd_imgid = tf.placeholder(tf.int32) feed_dict[f_pd_imgid] = feed_data.pd.imgid f_pd_ignore = tf.placeholder(tf.int32) feed_dict[f_pd_ignore] = feed_data.pd.ignore f_pd_score = tf.placeholder(tf.float32) feed_dict[f_pd_score] = feed_data.pd.score ap, pr = ops.average_precision3d( iou_threshold=f_iou, groundtruth_bbox=f_gt_bbox, groundtruth_imageid=f_gt_imgid, groundtruth_ignore=f_gt_ignore, prediction_bbox=f_pd_bbox, prediction_imageid=f_pd_imgid, prediction_ignore=f_pd_ignore, prediction_score=f_pd_score, num_recall_points=self.metadata.NumberOfPrecisionRecallPoints()) scalar_metrics = {'ap': ap} curve_metrics = {'pr': pr} return scalar_metrics, curve_metrics, feed_dict
def _BuildMetric(self, feed_data, classid): """Construct tensors and the feed_dict for Waymo metric op. Args: feed_data: a NestedMap returned by _GetData(). classid: integer. Returns: A tuple of 3 dicts: - scalar_metrics: a dict mapping all the metric names to fetch tensors. - curves: a dict mapping all the curve names to fetch tensors. - feed_dict: a dict mapping the tensors in feed_tensors to feed values. """ if feed_data is None: dummy_scalar = tf.constant(np.nan) dummy_curve = tf.zeros( [self.metadata.NumberOfPrecisionRecallPoints(), 2], tf.float32) scalar_metrics = { 'ap': dummy_scalar, 'ap_ha_weighted': dummy_scalar } curve_metrics = {'pr': dummy_curve, 'pr_ha_weighted': dummy_curve} return scalar_metrics, curve_metrics, {} feed_dict = {} f_gt_bbox = tf.placeholder(tf.float32) feed_dict[f_gt_bbox] = feed_data.gt.bbox f_gt_imgid = tf.placeholder(tf.int32) feed_dict[f_gt_imgid] = feed_data.gt.imgid f_pd_bbox = tf.placeholder(tf.float32) feed_dict[f_pd_bbox] = feed_data.pd.bbox f_pd_imgid = tf.placeholder(tf.int32) feed_dict[f_pd_imgid] = feed_data.pd.imgid f_pd_score = tf.placeholder(tf.float32) feed_dict[f_pd_score] = feed_data.pd.score num_gt_bboxes = feed_data.gt.imgid.shape[0] num_pd_bboxes = feed_data.pd.imgid.shape[0] gt_class_ids = tf.constant(classid, dtype=tf.uint8, shape=[num_gt_bboxes]) pd_class_ids = tf.constant(classid, dtype=tf.uint8, shape=[num_pd_bboxes]) ap, ap_ha, pr, pr_ha, _ = py_metrics_ops.detection_metrics( prediction_bbox=f_pd_bbox, prediction_type=pd_class_ids, prediction_score=f_pd_score, prediction_frame_id=tf.to_int64(f_pd_imgid), prediction_overlap_nlz=tf.zeros_like(f_pd_imgid, dtype=tf.bool), ground_truth_bbox=f_gt_bbox, ground_truth_type=gt_class_ids, ground_truth_frame_id=tf.to_int64(f_gt_imgid), ground_truth_difficulty=tf.zeros_like(f_gt_imgid, dtype=tf.uint8), config=self._waymo_metric_config) # All tensors returned by Waymo's metric op have a leading dimension # B=number of breakdowns. At this moment we always use B=1 to make # it compatible to the python code. scalar_metrics = {'ap': ap[0], 'ap_ha_weighted': ap_ha[0]} curve_metrics = {'pr': pr[0], 'pr_ha_weighted': pr_ha[0]} return scalar_metrics, curve_metrics, feed_dict