コード例 #1
0
ファイル: box_ops.py プロジェクト: August66/DETR-Paddle
def generalied_box_iou(boxes1, boxes2):
    """
    Generalized IoU from https://giou.stanford.edu/

    The boxes should be in [x0, y0, x1, y1] format

    Returns a [N, M] pairwise matrix, where N = len(boxes1)
    and M = len(boxes2)
    """
    # degenerate boxes gives inf / nan results
    # so do an early check
    assert L.reduce_all(boxes1[:, 2:] >= boxes1[:, :2])
    assert L.reduce_all(boxes2[:, 2:] >= boxes2[:, :2])
    iou, union = box_iou(boxes1, boxes2)

    N, M = boxes1.shape[0], boxes2.shape[0]
    boxes1 = L.unsqueeze(boxes1, axes=[1])  # [N, 1, 4]
    boxes1 = L.expand(boxes1, [1, M, 1])  # [N, M, 4]
    boxes2 = L.unsqueeze(boxes2, axes=[0])  # [1, M, 4]
    boxes2 = L.expand(boxes2, [N, 1, 1])  # [N, M, 4]
    lt = L.elementwise_min(boxes1[:, :, :2], boxes2[:, :, :2])  # [N, M, 2]
    rb = L.elementwise_max(boxes1[:, :, 2:], boxes2[:, :, 2:])  # [N, M, 2]

    wh = L.clip(rb - lt, min=0, max=1e8)  # [N, M, 2]
    area = wh[:, :, 0] * wh[:, :, 1] + 1e-4  # prevent devided by zero

    return iou - (area - union) / area
コード例 #2
0
def get_face_mask(densepose_map):
    """
    Obtain mask of faces. 

    Args:
        densepose_map (3D or 4D tensor)
    """
    need_reshape = len(densepose_map.shape) == 4
    if need_reshape:
        bo, t, h, w = densepose_map.shape
        densepose_map = L.reshape(densepose_map, (-1, h, w))

    b, h, w = densepose_map.shape
    part_map = (densepose_map / 2 + 0.5) * 24
    assert L.reduce_all((part_map >= 0)) and L.reduce_all((part_map < 25))

    mask = dg.to_variable(np.zeros((b, h, w)).astype('bool'))

    for j in [23, 24]:
        mask = L.logical_or(
            mask, L.logical_and((part_map > j - 0.1), (part_map < j + 0.1)))

    if need_reshape:
        mask = L.reshape(mask, (bo, t, h, w))

    return P.cast(mask, "float32")
コード例 #3
0
    def is_finished(self, step_idx, source_length, alive_log_probs, finished_scores, finished_in_finished):
        """
            is_finished
        """
        base_1 = layers.cast(source_length, 'float32') + 55.0
        base_1 /= 6.0
        max_length_penalty = layers.pow(base_1, self.alpha)

        flat_alive_log_probs = layers.reshape(alive_log_probs, [-1])
        lower_bound_alive_scores_1 = layers.gather(flat_alive_log_probs, [self.get_alive_index])
        
        lower_bound_alive_scores = lower_bound_alive_scores_1 / max_length_penalty
        
        lowest_score_of_finished_in_finish = layers.reduce_min(finished_scores * finished_in_finished, dim=1)

        finished_in_finished = layers.cast(finished_in_finished, 'bool')
        lowest_score_of_finished_in_finish += \
                        ((1.0 - layers.cast(layers.reduce_any(finished_in_finished, 1), 'float32')) * -INF)
        
        #print lowest_score_of_finished_in_finish
        bound_is_met = layers.reduce_all(layers.greater_than(lowest_score_of_finished_in_finish, 
                                                             lower_bound_alive_scores))

        decode_length = source_length + 50
        length_cond = layers.less_than(x=step_idx, y=decode_length)

        return layers.logical_and(x=layers.logical_not(bound_is_met), y=length_cond)
コード例 #4
0
def epoch_evaluate(args, model, loader, puncts):
    """Evaluate in one epoch"""
    model.eval()

    total_loss, metric = 0, Metric()

    for words, feats, arcs, rels in loader():
        # ignore the first token of each sentence
        tmp_words = layers.pad(words[:, 1:],
                               paddings=[0, 0, 1, 0],
                               pad_value=args.pad_index)
        mask = tmp_words != args.pad_index

        s_arc, s_rel = model(words, feats)
        loss = loss_function(s_arc, s_rel, arcs, rels, mask)
        arc_preds, rel_preds = decode(args, s_arc, s_rel, mask)
        # ignore all punctuation if not specified
        if not args.punct:
            punct_mask = layers.reduce_all(
                layers.expand(layers.unsqueeze(words, -1),
                              (1, 1, puncts.shape[0])) != layers.expand(
                                  layers.reshape(puncts, (1, 1, -1)),
                                  (*words.shape, 1)),
                dim=-1)
            mask = layers.logical_and(mask, punct_mask)

        metric(arc_preds, rel_preds, arcs, rels, mask)
        total_loss += loss.numpy().item()

    total_loss /= len(loader)

    return total_loss, metric
コード例 #5
0
 def reduce_compare(x, op_str, y):
     element_wise_result = eval("x " + op_str + " y")
     if op_str == "!=":
         return reduce_any(element_wise_result)
     elif op_str == "is" or op_str == "is not" or op_str == "in" or op_str == "not in":
         return element_wise_result
     else:
         return reduce_all(element_wise_result)
コード例 #6
0
ファイル: model.py プロジェクト: preston1000/bib_manage
def epoch_evaluate(args, model, loader, punctuation):
    """Evaluate in one epoch"""
    model.eval()
    total_loss, metric = 0, Metric()
    pad_index = args.pad_index
    bos_index = args.bos_index
    eos_index = args.eos_index

    for batch_index, inputs in enumerate(loader(), start=1):
        if args.encoding_model.startswith("ernie"):
            words, connections, deprel = inputs
            connection_prob, deprel_prob, words = model(words)
        else:
            words, feats, connections, deprel = inputs
            connection_prob, deprel_prob, words = model(words, feats)
        mask = layers.logical_and(
            layers.logical_and(words != pad_index, words != bos_index),
            words != eos_index,
        )
        loss = loss_function(connection_prob, deprel_prob, connections, deprel,
                             mask)
        connection_predict, deprel_predict = decode(args, connection_prob,
                                                    deprel_prob, mask)
        # ignore all punctuation if not specified
        if not args.punct:
            punct_mask = layers.reduce_all(
                layers.expand(layers.unsqueeze(words, -1),
                              (1, 1, punctuation.shape[0])) !=
                layers.expand(layers.reshape(punctuation,
                                             (1, 1, -1)), words.shape + [1]),
                dim=-1)

            mask = layers.logical_and(mask, punct_mask)

        metric(connection_predict, deprel_predict, connections, deprel, mask)
        total_loss += loss.numpy().item()

    total_loss /= len(loader)

    return total_loss, metric
コード例 #7
0
ファイル: model.py プロジェクト: zhousanfu/paddle-demo
        def early_finish(alive_log_probs, finished_scores,
                         finished_in_finished):
            max_length_penalty = np.power(((5. + max_len) / 6.), alpha)
            # The best possible score of the most likely alive sequence
            lower_bound_alive_scores = alive_log_probs[:,
                                                       0] / max_length_penalty

            # Now to compute the lowest score of a finished sequence in finished
            # If the sequence isn't finished, we multiply it's score by 0. since
            # scores are all -ve, taking the min will give us the score of the lowest
            # finished item.
            lowest_score_of_fininshed_in_finished = layers.reduce_min(
                finished_scores * finished_in_finished, 1)
            # If none of the sequences have finished, then the min will be 0 and
            # we have to replace it by -ve INF if it is. The score of any seq in alive
            # will be much higher than -ve INF and the termination condition will not
            # be met.
            lowest_score_of_fininshed_in_finished += (
                1. - layers.reduce_max(finished_in_finished, 1)) * -inf
            bound_is_met = layers.reduce_all(
                layers.greater_than(lowest_score_of_fininshed_in_finished,
                                    lower_bound_alive_scores))

            return bound_is_met
コード例 #8
0
ファイル: model.py プロジェクト: zhousanfu/paddle-demo
    def beam_search(self,
                    src_word,
                    src_pos,
                    src_slf_attn_bias,
                    trg_word,
                    trg_src_attn_bias,
                    bos_id=0,
                    eos_id=1,
                    beam_size=4,
                    max_len=256):
        def expand_to_beam_size(tensor, beam_size):
            tensor = layers.reshape(tensor,
                                    [tensor.shape[0], 1] + tensor.shape[1:])
            tile_dims = [1] * len(tensor.shape)
            tile_dims[1] = beam_size
            return layers.expand(tensor, tile_dims)

        def merge_batch_beams(tensor):
            return layers.reshape(tensor, [tensor.shape[0] * tensor.shape[1]] +
                                  tensor.shape[2:])

        def split_batch_beams(tensor):
            return fluid.layers.reshape(tensor,
                                        shape=[-1, beam_size] +
                                        list(tensor.shape[1:]))

        def mask_probs(probs, finished, noend_mask_tensor):
            # TODO: use where_op
            finished = layers.cast(finished, dtype=probs.dtype)
            probs = layers.elementwise_mul(layers.expand(
                layers.unsqueeze(finished, [2]), [1, 1, self.trg_vocab_size]),
                                           noend_mask_tensor,
                                           axis=-1) - layers.elementwise_mul(
                                               probs, (finished - 1), axis=0)
            return probs

        def gather(x, indices, batch_pos):
            topk_coordinates = fluid.layers.stack([batch_pos, indices], axis=2)
            return layers.gather_nd(x, topk_coordinates)

        # run encoder
        enc_output = self.encoder(src_word, src_pos, src_slf_attn_bias)

        # constant number
        inf = float(1. * 1e7)
        batch_size = enc_output.shape[0]
        max_len = (enc_output.shape[1] + 20) if max_len is None else max_len
        vocab_size_tensor = layers.fill_constant(shape=[1],
                                                 dtype="int64",
                                                 value=self.trg_vocab_size)
        end_token_tensor = to_variable(
            np.full([batch_size, beam_size], eos_id, dtype="int64"))
        noend_array = [-inf] * self.trg_vocab_size
        noend_array[eos_id] = 0
        noend_mask_tensor = to_variable(np.array(noend_array, dtype="float32"))
        batch_pos = layers.expand(
            layers.unsqueeze(
                to_variable(np.arange(0, batch_size, 1, dtype="int64")), [1]),
            [1, beam_size])

        predict_ids = []
        parent_ids = []
        ### initialize states of beam search ###
        log_probs = to_variable(
            np.array([[0.] + [-inf] * (beam_size - 1)] * batch_size,
                     dtype="float32"))
        finished = to_variable(
            np.full([batch_size, beam_size], 0, dtype="bool"))
        ### initialize inputs and states of transformer decoder ###
        ## init inputs for decoder, shaped `[batch_size*beam_size, ...]`
        trg_word = layers.fill_constant(shape=[batch_size * beam_size, 1],
                                        dtype="int64",
                                        value=bos_id)
        trg_pos = layers.zeros_like(trg_word)
        trg_src_attn_bias = merge_batch_beams(
            expand_to_beam_size(trg_src_attn_bias, beam_size))
        enc_output = merge_batch_beams(
            expand_to_beam_size(enc_output, beam_size))
        ## init states (caches) for transformer, need to be updated according to selected beam
        caches = [{
            "k":
            layers.fill_constant(
                shape=[batch_size * beam_size, self.n_head, 0, self.d_key],
                dtype=enc_output.dtype,
                value=0),
            "v":
            layers.fill_constant(
                shape=[batch_size * beam_size, self.n_head, 0, self.d_value],
                dtype=enc_output.dtype,
                value=0),
        } for i in range(self.n_layer)]

        for i in range(max_len):
            trg_pos = layers.fill_constant(shape=trg_word.shape,
                                           dtype="int64",
                                           value=i)
            caches = map_structure(  # can not be reshaped since the 0 size
                lambda x: x if i == 0 else merge_batch_beams(x), caches)
            logits = self.decoder(trg_word, trg_pos, None, trg_src_attn_bias,
                                  enc_output, caches)
            caches = map_structure(split_batch_beams, caches)
            step_log_probs = split_batch_beams(
                fluid.layers.log(fluid.layers.softmax(logits)))
            step_log_probs = mask_probs(step_log_probs, finished,
                                        noend_mask_tensor)
            log_probs = layers.elementwise_add(x=step_log_probs,
                                               y=log_probs,
                                               axis=0)
            log_probs = layers.reshape(log_probs,
                                       [-1, beam_size * self.trg_vocab_size])
            scores = log_probs
            topk_scores, topk_indices = fluid.layers.topk(input=scores,
                                                          k=beam_size)
            beam_indices = fluid.layers.elementwise_floordiv(
                topk_indices, vocab_size_tensor)
            token_indices = fluid.layers.elementwise_mod(
                topk_indices, vocab_size_tensor)

            # update states
            caches = map_structure(
                lambda x: gather(x, beam_indices, batch_pos), caches)
            log_probs = gather(log_probs, topk_indices, batch_pos)
            finished = gather(finished, beam_indices, batch_pos)
            finished = layers.logical_or(
                finished, layers.equal(token_indices, end_token_tensor))
            trg_word = layers.reshape(token_indices, [-1, 1])

            predict_ids.append(token_indices)
            parent_ids.append(beam_indices)

            if layers.reduce_all(finished).numpy():
                break

        predict_ids = layers.stack(predict_ids, axis=0)
        parent_ids = layers.stack(parent_ids, axis=0)
        finished_seq = layers.transpose(
            layers.gather_tree(predict_ids, parent_ids), [1, 2, 0])
        finished_scores = topk_scores

        return finished_seq, finished_scores
コード例 #9
0
def decode_with_grammar(decoder, inits, decode_vocab, max_step_num, **kwargs):
    """A modification of paddle.fluid.layers.dynamic_decode(...).
    Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned
    Tensor indicating finished status contains all True values or the number of
    decoding step reachs to :attr:`max_step_num`.
    :code:`decoder.initialize()` would be called once before the decoding loop.
    If the `decoder` has implemented `finalize` method, :code:`decoder.finalize()`
    would be called once after the decoding loop.

    Args:
        decoder(Decoder): An instance of `Decoder`.
        inits(tuple): Argument passed to `decoder.initialize`. 
        decode_vocab(DecoderDynamicVocab): namedtuple(table table_len column column_len value value_len)
        max_step_num(int): The maximum number of steps.
        **kwargs: Additional keyword arguments. Arguments passed to `decoder.step`. 

    Returns:
        tuple: A tuple( :code:`(final_outputs, final_states)` ) including the final \
            outputs and states, both are Tensor or nested structure of Tensor. \
            `final_outputs` has the same structure and data types as \
            :code:`decoder.output_dtype` , and each Tenser in `final_outputs` \
            is the stacked of all decoding steps' outputs, which might be revised \
            by :code:`decoder.finalize` . `final_states` is the counterpart \
            at last time step of initial states returned by :code:`decoder.initialize` , \
            thus has the same structure with it and has tensors with same shapes \
            and data types.
    """
    step_cnt = tensor.fill_constant(shape=[1], dtype="int64", value=1)
    max_step_num_tensor = tensor.fill_constant(shape=[1],
                                               dtype="int64",
                                               value=max_step_num - 2)

    # shape = [batch_size, beam_size, ...]
    initial_inputs, initial_states, initial_finished = decoder.initialize(
        inits, decode_vocab)
    global_inputs, global_states, global_finished = (initial_inputs,
                                                     initial_states,
                                                     initial_finished)
    inputs = initial_inputs
    states = initial_states

    # 保存输出结果
    outputs_arr_data = tensor.fill_constant_batch_size_like(
        inputs.input,
        shape=[-1, decoder.beam_size, max_step_num],
        dtype=decoder.output_dtype.predicted_ids,
        value=0)
    outputs_arr_pos = tensor.fill_constant_batch_size_like(
        inputs.input, shape=[-1, decoder.beam_size, 1], dtype='int64', value=0)
    outputs_array = data_structure.ArrayData(
        decoder.merge_batch_beams(outputs_arr_data),
        decoder.merge_batch_beams(outputs_arr_pos))

    sequence_lengths = tensor.cast(tensor.zeros_like(initial_finished),
                                   "int64")

    # 按语法解码的相关约束数据结构
    grammar_stack_dat = tensor.fill_constant_batch_size_like(
        inputs.input,
        shape=[-1, decoder.beam_size, max_step_num * STACK_EXPAND_TIMES],
        dtype='int64',
        value=0)
    grammar_stack_pos = tensor.fill_constant_batch_size_like(
        inputs.input, shape=[-1, decoder.beam_size, 1], dtype='int64', value=0)
    grammar_stack = data_structure.StackData(
        decoder.merge_batch_beams(grammar_stack_dat),
        decoder.merge_batch_beams(grammar_stack_pos))

    ############        循环解码,直到全部为 finish 状态        ############
    #   finish 的判断:通过 global_finished/next_finished && max_step_num 判断
    cond = layers.logical_not((layers.reduce_all(initial_finished)))
    while_op = layers.While(cond)
    with while_op.block():
        # step_outputs --> OutputWrapper
        # next_states  --> StateWrapper
        # next_inputs  --> DecoderInputsWrapper
        step_outputs, next_states, next_inputs = decoder.step(
            inputs, states, **kwargs)
        predicted_ids = step_outputs.predicted_ids
        _save_predict_output(outputs_array, predicted_ids,
                             next_states.finished)

        pred_gmr_type = decoder.grammar_type(predicted_ids)
        cond_type_leaf = layers.equal(pred_gmr_type, decoder.GMR_TYPE.LEAF)
        cond_type_midd = layers.equal(pred_gmr_type, decoder.GMR_TYPE.MID)

        _process_type_leaf(cond_type_leaf, decoder, grammar_stack, next_inputs,
                           next_states.finished)
        _process_type_midd(cond_type_midd, decoder, grammar_stack, next_inputs,
                           predicted_ids)

        ##next_sequence_lengths = layers.elementwise_add(sequence_lengths,
        ##                        tensor.cast(layers.logical_not(global_finished), sequence_lengths.dtype))

        _check_finished(decoder, next_inputs, next_states.finished,
                        outputs_array)

        layers.utils.map_structure(tensor.assign, next_inputs, global_inputs)
        layers.utils.map_structure(tensor.assign, next_states, global_states)
        tensor.assign(next_states.finished, global_finished)
        ##tensor.assign(next_sequence_lengths, sequence_lengths)

        # 更新循环条件
        layers.increment(x=step_cnt, value=1.0, in_place=True)
        layers.logical_and(
            layers.logical_not(layers.reduce_all(next_states.finished)),
            layers.less_equal(step_cnt, max_step_num_tensor), cond)

    final_outputs = outputs_array.data
    final_states = global_states

    final_outputs, final_states = decoder.finalize(final_outputs,
                                                   global_states,
                                                   sequence_lengths)

    return final_outputs, final_states
コード例 #10
0
    def _greedy_search(self,
                       src_word,
                       src_pos,
                       src_slf_attn_bias,
                       trg_word,
                       trg_src_attn_bias,
                       bos_id=0,
                       eos_id=1,
                       max_len=256):
        # run encoder
        enc_output = self.encoder(src_word, src_pos, src_slf_attn_bias)

        # constant number
        batch_size = enc_output.shape[0]
        max_len = (enc_output.shape[1] + 20) if max_len is None else max_len
        end_token_tensor = layers.fill_constant(shape=[batch_size, 1],
                                                dtype="int64",
                                                value=eos_id)

        predict_ids = []
        log_probs = layers.fill_constant(shape=[batch_size, 1],
                                         dtype="float32",
                                         value=0)
        trg_word = layers.fill_constant(shape=[batch_size, 1],
                                        dtype="int64",
                                        value=bos_id)
        finished = layers.fill_constant(shape=[batch_size, 1],
                                        dtype="bool",
                                        value=0)

        ## init states (caches) for transformer
        caches = [{
            "k":
            layers.fill_constant(shape=[batch_size, self.n_head, 0, self.d_key],
                                 dtype=enc_output.dtype,
                                 value=0),
            "v":
            layers.fill_constant(
                shape=[batch_size, self.n_head, 0, self.d_value],
                dtype=enc_output.dtype,
                value=0),
        } for i in range(self.n_layer)]

        for i in range(max_len):
            trg_pos = layers.fill_constant(shape=trg_word.shape,
                                           dtype="int64",
                                           value=i)
            logits = self.decoder(trg_word, trg_pos, None, trg_src_attn_bias,
                                  enc_output, caches)
            step_log_probs = layers.log(layers.softmax(logits))
            log_probs = layers.elementwise_add(x=step_log_probs,
                                               y=log_probs,
                                               axis=0)
            scores = log_probs
            topk_scores, topk_indices = layers.topk(input=scores, k=1)

            finished = layers.logical_or(
                finished, layers.equal(topk_indices, end_token_tensor))
            trg_word = topk_indices
            log_probs = topk_scores

            predict_ids.append(topk_indices)

            if layers.reduce_all(finished).numpy():
                break

        predict_ids = layers.stack(predict_ids, axis=0)
        finished_seq = layers.transpose(predict_ids, [1, 2, 0])
        finished_scores = topk_scores

        return finished_seq, finished_scores