def log_prob(self, value): """Log probability density/mass function. Args: value (Tensor): The input tensor. Returns: Tensor: log probability.The data type is same with value. """ value = self._check_values_dtype_in_probs(self.low, value) if _non_static_mode(): # ensure value in [low, high] lb_bool = self.low < value ub_bool = value < self.high lb = _C_ops.cast(lb_bool, 'in_dtype', lb_bool.dtype, 'out_dtype', value.dtype) ub = _C_ops.cast(ub_bool, 'in_dtype', ub_bool.dtype, 'out_dtype', value.dtype) return nn.log(lb * ub) - nn.log(self.high - self.low) name = self.name + '_log_prob' lb_bool = self.low < value ub_bool = value < self.high lb = tensor.cast(lb_bool, dtype=value.dtype) ub = tensor.cast(ub_bool, dtype=value.dtype) return elementwise_sub(nn.log(lb * ub), nn.log(self.high - self.low), name=name)
def probs(self, value): """Probability density/mass function. Args: value (Tensor): The input tensor. Returns: Tensor: probability.The data type is same with value. """ value = self._check_values_dtype_in_probs(self.low, value) if _non_static_mode(): lb_bool = self.low < value ub_bool = value < self.high lb = _C_ops.cast(lb_bool, 'in_dtype', lb_bool.dtype, 'out_dtype', value.dtype) ub = _C_ops.cast(ub_bool, 'in_dtype', ub_bool.dtype, 'out_dtype', value.dtype) return (lb * ub) / (self.high - self.low) name = self.name + '_probs' lb_bool = self.low < value ub_bool = value < self.high lb = tensor.cast(lb_bool, dtype=value.dtype) ub = tensor.cast(ub_bool, dtype=value.dtype) return elementwise_div((lb * ub), (self.high - self.low), name=name)
def _check_values_dtype_in_probs(self, param, value): """ Log_prob and probs methods have input ``value``, if value's dtype is different from param, convert value's dtype to be consistent with param's dtype. Args: param (Tensor): low and high in Uniform class, loc and scale in Normal class. value (Tensor): The input tensor. Returns: value (Tensor): Change value's dtype if value's dtype is different from param. """ if _non_static_mode(): if value.dtype != param.dtype and convert_dtype( value.dtype) in ['float32', 'float64']: warnings.warn( "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted." ) return _C_ops.cast(value, 'in_dtype', value.dtype, 'out_dtype', param.dtype) return value check_variable_and_dtype(value, 'value', ['float32', 'float64'], 'log_prob') if value.dtype != param.dtype: warnings.warn( "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted." ) return tensor.cast(value, dtype=param.dtype) return value
def __init__(self, logits, name=None): """ Args: logits(list|tuple|numpy.ndarray|Tensor): The logits input of categorical distribution. The data type is float32 or float64. name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. """ if not _non_static_mode(): check_type(logits, 'logits', (np.ndarray, tensor.Variable, list, tuple), 'Categorical') self.name = name if name is not None else 'Categorical' self.dtype = 'float32' if self._validate_args(logits): self.logits = logits self.dtype = convert_dtype(logits.dtype) else: if isinstance(logits, np.ndarray) and str( logits.dtype) in ['float32', 'float64']: self.dtype = logits.dtype self.logits = self._to_tensor(logits)[0] if self.dtype != convert_dtype(self.logits.dtype): self.logits = tensor.cast(self.logits, dtype=self.dtype) dist_sum = paddle.sum(self.logits, axis=-1, keepdim=True) self._prob = self.logits / dist_sum
def net(self, input, is_infer=False): """ network""" text = input[0] pos_tag = input[1] neg_tag = input[2] text_emb = fluid.embedding(input=text, size=[self.vocab_text_size, self.emb_dim], param_attr="text_emb") text_emb = fluid.layers.squeeze(input=text_emb, axes=[1]) pos_tag_emb = fluid.embedding(input=pos_tag, size=[self.vocab_tag_size, self.emb_dim], param_attr="tag_emb") pos_tag_emb = fluid.layers.squeeze(input=pos_tag_emb, axes=[1]) neg_tag_emb = fluid.embedding(input=neg_tag, size=[self.vocab_tag_size, self.emb_dim], param_attr="tag_emb") neg_tag_emb = fluid.layers.squeeze(input=neg_tag_emb, axes=[1]) conv_1d = fluid.nets.sequence_conv_pool(input=text_emb, num_filters=self.hid_dim, filter_size=self.win_size, act="tanh", pool_type="max", param_attr="cnn") text_hid = fluid.layers.fc(input=conv_1d, size=self.emb_dim, param_attr="text_hid") cos_pos = nn.cos_sim(pos_tag_emb, text_hid) mul_text_hid = fluid.layers.sequence_expand_as(x=text_hid, y=neg_tag_emb) mul_cos_neg = nn.cos_sim(neg_tag_emb, mul_text_hid) cos_neg_all = fluid.layers.sequence_reshape(input=mul_cos_neg, new_dim=self.neg_size) #choose max negtive cosine cos_neg = nn.reduce_max(cos_neg_all, dim=1, keep_dim=True) #calculate hinge loss loss_part1 = nn.elementwise_sub( tensor.fill_constant_batch_size_like(input=cos_pos, shape=[-1, 1], value=self.margin, dtype='float32'), cos_pos) loss_part2 = nn.elementwise_add(loss_part1, cos_neg) loss_part3 = nn.elementwise_max( tensor.fill_constant_batch_size_like(input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), loss_part2) avg_cost = nn.mean(loss_part3) less = tensor.cast(cf.less_than(cos_neg, cos_pos), dtype='float32') correct = nn.reduce_sum(less) self._cost = avg_cost if is_infer: self._infer_results["correct"] = correct self._infer_results["cos_pos"] = cos_pos else: self._metrics["correct"] = correct self._metrics["cos_pos"] = cos_pos
def get_acc(self, x, y): less = tensor.cast(cf.less_than(x, y), dtype='float32') label_ones = fluid.layers.fill_constant_batch_size_like( input=x, dtype='float32', shape=[-1, 1], value=1.0) correct = fluid.layers.reduce_sum(less) total = fluid.layers.reduce_sum(label_ones) acc = fluid.layers.elementwise_div(correct, total) return acc
def network(vocab_text_size, vocab_tag_size, emb_dim=10, hid_dim=1000, win_size=5, margin=0.1, neg_size=5): """ network definition """ text = io.data(name="text", shape=[1], lod_level=1, dtype='int64') pos_tag = io.data(name="pos_tag", shape=[1], lod_level=1, dtype='int64') neg_tag = io.data(name="neg_tag", shape=[1], lod_level=1, dtype='int64') text_emb = nn.embedding(input=text, size=[vocab_text_size, emb_dim], param_attr="text_emb") pos_tag_emb = nn.embedding(input=pos_tag, size=[vocab_tag_size, emb_dim], param_attr="tag_emb") neg_tag_emb = nn.embedding(input=neg_tag, size=[vocab_tag_size, emb_dim], param_attr="tag_emb") conv_1d = fluid.nets.sequence_conv_pool(input=text_emb, num_filters=hid_dim, filter_size=win_size, act="tanh", pool_type="max", param_attr="cnn") text_hid = fluid.layers.fc(input=conv_1d, size=emb_dim, param_attr="text_hid") cos_pos = nn.cos_sim(pos_tag_emb, text_hid) mul_text_hid = fluid.layers.sequence_expand_as(x=text_hid, y=neg_tag_emb) mul_cos_neg = nn.cos_sim(neg_tag_emb, mul_text_hid) cos_neg_all = fluid.layers.sequence_reshape(input=mul_cos_neg, new_dim=neg_size) #choose max negtive cosine cos_neg = nn.reduce_max(cos_neg_all, dim=1, keep_dim=True) #calculate hinge loss loss_part1 = nn.elementwise_sub( tensor.fill_constant_batch_size_like(input=cos_pos, shape=[-1, 1], value=margin, dtype='float32'), cos_pos) loss_part2 = nn.elementwise_add(loss_part1, cos_neg) loss_part3 = nn.elementwise_max( tensor.fill_constant_batch_size_like(input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), loss_part2) avg_cost = nn.mean(loss_part3) less = tensor.cast(cf.less_than(cos_neg, cos_pos), dtype='float32') correct = nn.reduce_sum(less) return avg_cost, correct, cos_pos
def __init__(self, loc, scale, name=None): if not _non_static_mode(): check_type(loc, 'loc', (int, float, np.ndarray, tensor.Variable, list, tuple), 'Normal') check_type(scale, 'scale', (int, float, np.ndarray, tensor.Variable, list, tuple), 'Normal') self.batch_size_unknown = False self.all_arg_is_float = False self.name = name if name is not None else 'Normal' self.dtype = 'float32' if isinstance(loc, int): loc = float(loc) if isinstance(scale, int): scale = float(scale) if self._validate_args(loc, scale): self.batch_size_unknown = True self.loc = loc self.scale = scale self.dtype = convert_dtype(loc.dtype) else: if isinstance(loc, float) and isinstance(scale, float): self.all_arg_is_float = True if isinstance(loc, np.ndarray) and str( loc.dtype) in ['float32', 'float64']: self.dtype = loc.dtype elif isinstance(scale, np.ndarray) and str( scale.dtype) in ['float32', 'float64']: self.dtype = scale.dtype # pylint: disable=unbalanced-tuple-unpacking self.loc, self.scale = self._to_tensor(loc, scale) if self.dtype != convert_dtype(self.loc.dtype): self.loc = tensor.cast(self.loc, dtype=self.dtype) self.scale = tensor.cast(self.scale, dtype=self.dtype) super(Normal, self).__init__(self.loc.shape)
def __init__(self, low, high, name=None): if not _non_static_mode(): check_type(low, 'low', (int, float, np.ndarray, tensor.Variable, list, tuple), 'Uniform') check_type(high, 'high', (int, float, np.ndarray, tensor.Variable, list, tuple), 'Uniform') self.all_arg_is_float = False self.batch_size_unknown = False self.name = name if name is not None else 'Uniform' self.dtype = 'float32' if isinstance(low, int): low = float(low) if isinstance(high, int): high = float(high) if self._validate_args(low, high): self.batch_size_unknown = True self.low = low self.high = high self.dtype = convert_dtype(low.dtype) else: if isinstance(low, float) and isinstance(high, float): self.all_arg_is_float = True if isinstance(low, np.ndarray) and str( low.dtype) in ['float32', 'float64']: self.dtype = low.dtype elif isinstance(high, np.ndarray) and str( high.dtype) in ['float32', 'float64']: self.dtype = high.dtype # pylint: disable=unbalanced-tuple-unpacking self.low, self.high = self._to_tensor(low, high) if self.dtype != convert_dtype(self.low.dtype): self.low = tensor.cast(self.low, dtype=self.dtype) self.high = tensor.cast(self.high, dtype=self.dtype)
def _get_correct(self, x, y): less = tensor.cast(cf.less_than(x, y), dtype='float32') correct = fluid.layers.reduce_sum(less) return correct
def _decay_step_counter(begin=0): # the first global step is zero in learning rate decay global_step = nn.autoincreased_step_counter( counter_name='@LR_DECAY_COUNTER@', begin=begin, step=1) global_step = tensor.cast(global_step, 'float32') return global_step
def _lf_embedder(self, tokens, token_lens=None): """lf embedder. Args: tokens (Variable): [batch_size, seq_len] token_lens (Variable): Default is None. Returns: TODO Raises: NULL """ self._batch_size = layers.shape(self.question_encoding)[0] ## Grammar Rule Embedding self._grammar_vocab = tensor.cast(tensor.assign( self.grammar.gmr_vocab.astype(np.int32)), dtype='int64') self._grammar_emb = fluid.embedding( input=self._grammar_vocab, size=[self.grammar.grammar_size, self.lf_emb_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr(name="lf_embedding", initializer=nn_utils.uniform( self.init_scale))) batch_emb_lookup_grammar = layers.expand( layers.unsqueeze(self._grammar_emb, [0]), [self._batch_size, 1, 1]) def _table_to_lf_input(ori_encoding): """trans ori_encoding to size of lf_embedding """ output = layers.fc(input=ori_encoding, size=self.lf_emb_size, num_flatten_dims=2, **nn_utils.param_attr('fc_table2lf_input', self.init_scale, need_bias=False)) return output batch_emb_lookup_all = tensor.concat([ batch_emb_lookup_grammar, _table_to_lf_input(self.tname_encoding), _table_to_lf_input(self.cname_encoding), _table_to_lf_input(self.value_encoding) ], axis=1) lf_embedding = nn_utils.batch_gather_2d(batch_emb_lookup_all, tokens) ## Grammar Rule 类型 Embedding self._grammar2name = layers.cast(layers.assign( self.grammar.gmr2name_arr.astype(np.int32)), dtype='int64') lf_name = layers.reshape(layers.gather( self._grammar2name, layers.reshape(tokens, shape=[-1])), shape=tokens.shape) lf_name.stop_gradient = True lf_name_emb = fluid.embedding( input=lf_name, size=[self.grammar.name_size, self.lf_name_emb_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr(name="lf_name_embedding", initializer=nn_utils.uniform( self.init_scale))) output = layers.concat([lf_embedding, lf_name_emb], axis=-1) if token_lens is not None: mask = layers.sequence_mask(token_lens, maxlen=layers.shape(tokens)[1], dtype='float32') output = layers.elementwise_mul(output, mask, axis=0) return output
def __call__(self, location, confidence, gt_box, gt_label, landmark_predict, lmk_label, lmk_ignore_flag, prior_box, prior_box_var=None): def _reshape_to_2d(var): return layers.flatten(x=var, axis=2) helper = LayerHelper('ssd_loss') #, **locals()) # Only support mining_type == 'max_negative' now. mining_type = 'max_negative' # The max `sample_size` of negative box, used only # when mining_type is `hard_example`. sample_size = None num, num_prior, num_class = confidence.shape conf_shape = layers.shape(confidence) # 1. Find matched boundding box by prior box. # 1.1 Compute IOU similarity between ground-truth boxes and prior boxes. iou = iou_similarity(x=gt_box, y=prior_box) # 1.2 Compute matched boundding box by bipartite matching algorithm. matched_indices, matched_dist = bipartite_match( iou, self.match_type, self.overlap_threshold) # 2. Compute confidence for mining hard examples # 2.1. Get the target label based on matched indices gt_label = layers.reshape(x=gt_label, shape=(len(gt_label.shape) - 1) * (0, ) + (-1, 1)) gt_label.stop_gradient = True target_label, _ = target_assign(gt_label, matched_indices, mismatch_value=self.background_label) # 2.2. Compute confidence loss. # Reshape confidence to 2D tensor. confidence = _reshape_to_2d(confidence) target_label = tensor.cast(x=target_label, dtype='int64') target_label = _reshape_to_2d(target_label) target_label.stop_gradient = True conf_loss = layers.softmax_with_cross_entropy(confidence, target_label) # 3. Mining hard examples actual_shape = layers.slice(conf_shape, axes=[0], starts=[0], ends=[2]) actual_shape.stop_gradient = True conf_loss = layers.reshape(x=conf_loss, shape=(-1, 0), actual_shape=actual_shape) conf_loss.stop_gradient = True neg_indices = helper.create_variable_for_type_inference(dtype='int32') updated_matched_indices = helper.create_variable_for_type_inference( dtype=matched_indices.dtype) helper.append_op(type='mine_hard_examples', inputs={ 'ClsLoss': conf_loss, 'LocLoss': None, 'MatchIndices': matched_indices, 'MatchDist': matched_dist, }, outputs={ 'NegIndices': neg_indices, 'UpdatedMatchIndices': updated_matched_indices }, attrs={ 'neg_pos_ratio': self.neg_pos_ratio, 'neg_dist_threshold': self.neg_overlap, 'mining_type': mining_type, 'sample_size': sample_size, }) # 4. Assign classification and regression targets # 4.1. Encoded bbox according to the prior boxes. encoded_bbox = box_coder(prior_box=prior_box, prior_box_var=prior_box_var, target_box=gt_box, code_type='encode_center_size') # 4.2. Assign regression targets target_bbox, target_loc_weight = target_assign( encoded_bbox, updated_matched_indices, mismatch_value=self.background_label) # 4.3. Assign classification targets target_label, target_conf_weight = target_assign( gt_label, updated_matched_indices, negative_indices=neg_indices, mismatch_value=self.background_label) target_loc_weight = target_loc_weight * target_label encoded_lmk_label = self.decode_lmk(lmk_label, prior_box, prior_box_var) target_lmk, target_lmk_weight = target_assign( encoded_lmk_label, updated_matched_indices, mismatch_value=self.background_label) lmk_ignore_flag = layers.reshape( x=lmk_ignore_flag, shape=(len(lmk_ignore_flag.shape) - 1) * (0, ) + (-1, 1)) target_ignore, nouse = target_assign( lmk_ignore_flag, updated_matched_indices, mismatch_value=self.background_label) target_lmk_weight = target_lmk_weight * target_ignore landmark_predict = _reshape_to_2d(landmark_predict) target_lmk = _reshape_to_2d(target_lmk) target_lmk_weight = _reshape_to_2d(target_lmk_weight) lmk_loss = layers.smooth_l1(landmark_predict, target_lmk) lmk_loss = lmk_loss * target_lmk_weight target_lmk.stop_gradient = True target_lmk_weight.stop_gradient = True target_ignore.stop_gradient = True nouse.stop_gradient = True # 5. Compute loss. # 5.1 Compute confidence loss. target_label = _reshape_to_2d(target_label) target_label = tensor.cast(x=target_label, dtype='int64') conf_loss = layers.softmax_with_cross_entropy(confidence, target_label) target_conf_weight = _reshape_to_2d(target_conf_weight) conf_loss = conf_loss * target_conf_weight # the target_label and target_conf_weight do not have gradient. target_label.stop_gradient = True target_conf_weight.stop_gradient = True # 5.2 Compute regression loss. location = _reshape_to_2d(location) target_bbox = _reshape_to_2d(target_bbox) loc_loss = layers.smooth_l1(location, target_bbox) target_loc_weight = _reshape_to_2d(target_loc_weight) loc_loss = loc_loss * target_loc_weight # the target_bbox and target_loc_weight do not have gradient. target_bbox.stop_gradient = True target_loc_weight.stop_gradient = True # 5.3 Compute overall weighted loss. loss = self.conf_loss_weight * conf_loss + self.loc_loss_weight * loc_loss + 0.4 * lmk_loss # reshape to [N, Np], N is the batch size and Np is the prior box number. loss = layers.reshape(x=loss, shape=(-1, 0), actual_shape=actual_shape) loss = layers.reduce_sum(loss, dim=1, keep_dim=True) if self.normalize: normalizer = layers.reduce_sum(target_loc_weight) + 1 loss = loss / normalizer return loss
def decode_with_grammar(decoder, inits, decode_vocab, max_step_num, **kwargs): """A modification of paddle.fluid.layers.dynamic_decode(...). Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned Tensor indicating finished status contains all True values or the number of decoding step reachs to :attr:`max_step_num`. :code:`decoder.initialize()` would be called once before the decoding loop. If the `decoder` has implemented `finalize` method, :code:`decoder.finalize()` would be called once after the decoding loop. Args: decoder(Decoder): An instance of `Decoder`. inits(tuple): Argument passed to `decoder.initialize`. decode_vocab(DecoderDynamicVocab): namedtuple(table table_len column column_len value value_len) max_step_num(int): The maximum number of steps. **kwargs: Additional keyword arguments. Arguments passed to `decoder.step`. Returns: tuple: A tuple( :code:`(final_outputs, final_states)` ) including the final \ outputs and states, both are Tensor or nested structure of Tensor. \ `final_outputs` has the same structure and data types as \ :code:`decoder.output_dtype` , and each Tenser in `final_outputs` \ is the stacked of all decoding steps' outputs, which might be revised \ by :code:`decoder.finalize` . `final_states` is the counterpart \ at last time step of initial states returned by :code:`decoder.initialize` , \ thus has the same structure with it and has tensors with same shapes \ and data types. """ step_cnt = tensor.fill_constant(shape=[1], dtype="int64", value=1) max_step_num_tensor = tensor.fill_constant(shape=[1], dtype="int64", value=max_step_num - 2) # shape = [batch_size, beam_size, ...] initial_inputs, initial_states, initial_finished = decoder.initialize( inits, decode_vocab) global_inputs, global_states, global_finished = (initial_inputs, initial_states, initial_finished) inputs = initial_inputs states = initial_states # 保存输出结果 outputs_arr_data = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, max_step_num], dtype=decoder.output_dtype.predicted_ids, value=0) outputs_arr_pos = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, 1], dtype='int64', value=0) outputs_array = data_structure.ArrayData( decoder.merge_batch_beams(outputs_arr_data), decoder.merge_batch_beams(outputs_arr_pos)) sequence_lengths = tensor.cast(tensor.zeros_like(initial_finished), "int64") # 按语法解码的相关约束数据结构 grammar_stack_dat = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, max_step_num * STACK_EXPAND_TIMES], dtype='int64', value=0) grammar_stack_pos = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, 1], dtype='int64', value=0) grammar_stack = data_structure.StackData( decoder.merge_batch_beams(grammar_stack_dat), decoder.merge_batch_beams(grammar_stack_pos)) ############ 循环解码,直到全部为 finish 状态 ############ # finish 的判断:通过 global_finished/next_finished && max_step_num 判断 cond = layers.logical_not((layers.reduce_all(initial_finished))) while_op = layers.While(cond) with while_op.block(): # step_outputs --> OutputWrapper # next_states --> StateWrapper # next_inputs --> DecoderInputsWrapper step_outputs, next_states, next_inputs = decoder.step( inputs, states, **kwargs) predicted_ids = step_outputs.predicted_ids _save_predict_output(outputs_array, predicted_ids, next_states.finished) pred_gmr_type = decoder.grammar_type(predicted_ids) cond_type_leaf = layers.equal(pred_gmr_type, decoder.GMR_TYPE.LEAF) cond_type_midd = layers.equal(pred_gmr_type, decoder.GMR_TYPE.MID) _process_type_leaf(cond_type_leaf, decoder, grammar_stack, next_inputs, next_states.finished) _process_type_midd(cond_type_midd, decoder, grammar_stack, next_inputs, predicted_ids) ##next_sequence_lengths = layers.elementwise_add(sequence_lengths, ## tensor.cast(layers.logical_not(global_finished), sequence_lengths.dtype)) _check_finished(decoder, next_inputs, next_states.finished, outputs_array) layers.utils.map_structure(tensor.assign, next_inputs, global_inputs) layers.utils.map_structure(tensor.assign, next_states, global_states) tensor.assign(next_states.finished, global_finished) ##tensor.assign(next_sequence_lengths, sequence_lengths) # 更新循环条件 layers.increment(x=step_cnt, value=1.0, in_place=True) layers.logical_and( layers.logical_not(layers.reduce_all(next_states.finished)), layers.less_equal(step_cnt, max_step_num_tensor), cond) final_outputs = outputs_array.data final_states = global_states final_outputs, final_states = decoder.finalize(final_outputs, global_states, sequence_lengths) return final_outputs, final_states
def probs(self, value): """Probabilities of the given category (``value``). If ``logits`` is 2-D or higher dimension, the last dimension will be regarded as category, and the others represents the different distributions. At the same time, if ``vlaue`` is 1-D Tensor, ``value`` will be broadcast to the same number of distributions as ``logits``. If ``value`` is not 1-D Tensor, ``value`` should have the same number distributions with ``logits. That is, ``value[:-1] = logits[:-1]``. Args: value (Tensor): The input tensor represents the selected category index. Returns: Tensor: probability according to the category index. Examples: .. code-block:: python import paddle from paddle.distribution import Categorical paddle.seed(100) # on CPU device x = paddle.rand([6]) print(x) # [0.5535528 0.20714243 0.01162981 # 0.51577556 0.36369765 0.2609165 ] cat = Categorical(x) value = paddle.to_tensor([2,1,3]) cat.probs(value) # [0.00608027 0.108298 0.269656] """ name = self.name + '_probs' dist_sum = nn.reduce_sum(self.logits, dim=-1, keep_dim=True) prob = self.logits / dist_sum shape = list(prob.shape) value_shape = list(value.shape) if len(shape) == 1: num_value_in_one_dist = np.prod(value_shape) index_value = nn.reshape(value, [num_value_in_one_dist, 1]) index = index_value else: num_dist = np.prod(shape[:-1]) num_value_in_one_dist = value_shape[-1] prob = nn.reshape(prob, [num_dist, shape[-1]]) if len(value_shape) == 1: value = nn.expand(value, [num_dist]) value_shape = shape[:-1] + value_shape index_value = nn.reshape(value, [num_dist, -1, 1]) if shape[:-1] != value_shape[:-1]: raise ValueError( "shape of value {} must match shape of logits {}".format( str(value_shape[:-1]), str(shape[:-1]))) index_prefix = nn.unsqueeze(arange(num_dist, dtype=index_value.dtype), axes=-1) index_prefix = nn.expand(index_prefix, [1, num_value_in_one_dist]) index_prefix = nn.unsqueeze(index_prefix, axes=-1) if index_value.dtype != index_prefix.dtype: tensor.cast(index_prefix, dtype=index_value.dtype) index = concat([index_prefix, index_value], axis=-1) # value is the category index to search for the corresponding probability. select_prob = gather_nd(prob, index) return nn.reshape(select_prob, value_shape, name=name)