Beispiel #1
0
def train():
    paddle.set_device("gpu" if args.n_gpu else "cpu")
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()

    model = ErnieForGeneration.from_pretrained(args.model_name_or_path)
    if "ernie-tiny" in args.model_name_or_path:
        tokenizer = ErnieTinyTokenizer.from_pretrained(args.model_name_or_path)
    elif "ernie" in args.model_name_or_path:
        tokenizer = ErnieTokenizer.from_pretrained(args.model_name_or_path)
    elif "roberta" in args.model_name_or_path or "rbt" in args.model_name_or_path:
        tokenizer = RobertaTokenizer.from_pretrained(args.model_name_or_path)
    elif "electra" in args.model_name_or_path:
        tokenizer = ElectraTokenizer.from_pretrained(args.model_name_or_path)
    else:
        tokenizer = BertTokenizer.from_pretrained(args.model_name_or_path)
    if args.init_checkpoint:
        model_state = paddle.load(args.init_checkpoint)
        model.set_state_dict(model_state)

    train_dataset, dev_dataset = Poetry.get_datasets(['train', 'dev'])
    attn_id = tokenizer.vocab[
        '[ATTN]'] if '[ATTN]' in tokenizer.vocab else tokenizer.vocab['[MASK]']
    tgt_type_id = model.sent_emb.weight.shape[0] - 1

    trans_func = convert_example(tokenizer=tokenizer,
                                 attn_id=attn_id,
                                 tgt_type_id=tgt_type_id,
                                 max_encode_len=args.max_encode_len,
                                 max_decode_len=args.max_decode_len,
                                 noise_prob=args.noise_prob,
                                 use_random_noice=args.use_random_noice)

    train_dataset = train_dataset.apply(trans_func, lazy=True)
    train_batch_sampler = paddle.io.DistributedBatchSampler(
        train_dataset, batch_size=args.batch_size, shuffle=True)
    batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # src_ids
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # src_pids
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # src_sids
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # tgt_ids
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # tgt_pids
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # tgt_sids
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # attn_ids
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # tgt_labels
    ): after_padding(fn(samples))
    train_data_loader = DataLoader(dataset=train_dataset,
                                   batch_sampler=train_batch_sampler,
                                   collate_fn=batchify_fn,
                                   num_workers=0,
                                   return_list=True)

    dev_dataset = dev_dataset.apply(trans_func, lazy=True)
    dev_batch_sampler = paddle.io.BatchSampler(dev_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=False)
    dev_data_loader = DataLoader(dataset=dev_dataset,
                                 batch_sampler=dev_batch_sampler,
                                 collate_fn=batchify_fn,
                                 num_workers=0,
                                 return_list=True)

    label_num = model.word_emb.weight.shape[0]
    if paddle.distributed.get_world_size() > 1:
        model = paddle.DataParallel(model)

    max_steps = (len(train_data_loader) * args.num_epochs)
    lr_scheduler = paddle.optimizer.lr.LambdaDecay(
        args.learning_rate,
        lambda current_step, num_warmup_steps=max_steps * args.
        warmup_proportion, num_training_steps=max_steps: float(
            current_step) / float(max(1, num_warmup_steps))
        if current_step < num_warmup_steps else max(
            0.0,
            float(num_training_steps - current_step) / float(
                max(1, num_training_steps - num_warmup_steps))))

    optimizer = paddle.optimizer.AdamW(
        learning_rate=lr_scheduler,
        epsilon=args.adam_epsilon,
        parameters=model.parameters(),
        weight_decay=args.weight_decay,
        grad_clip=nn.ClipGradByGlobalNorm(1.0),
        apply_decay_param_fun=lambda x: x in [
            p.name for n, p in model.named_parameters()
            if not any(nd in n for nd in ["bias", "norm"])
        ])

    rouge1 = Rouge1()
    rouge2 = Rouge2()

    global_step = 1
    tic_train = time.time()
    for epoch in range(args.num_epochs):
        for step, batch in enumerate(train_data_loader, start=1):
            (src_ids, src_sids, src_pids, tgt_ids, tgt_sids, tgt_pids,
             attn_ids, mask_src_2_src, mask_tgt_2_srctgt,
             mask_attn_2_srctgtattn, tgt_labels, _) = batch
            # import pdb; pdb.set_trace()
            _, __, info = model(src_ids,
                                sent_ids=src_sids,
                                pos_ids=src_pids,
                                attn_bias=mask_src_2_src,
                                encode_only=True)
            cached_k, cached_v = info['caches']
            _, __, info = model(tgt_ids,
                                sent_ids=tgt_sids,
                                pos_ids=tgt_pids,
                                attn_bias=mask_tgt_2_srctgt,
                                past_cache=(cached_k, cached_v),
                                encode_only=True)
            cached_k2, cached_v2 = info['caches']
            past_cache_k = [
                paddle.concat([k, k2], 1)
                for k, k2 in zip(cached_k, cached_k2)
            ]
            past_cache_v = [
                paddle.concat([v, v2], 1)
                for v, v2 in zip(cached_v, cached_v2)
            ]
            if args.label_smooth > 0.:
                tgt_labels = nn.functional.label_smooth(
                    nn.functional.one_hot(tgt_labels, label_num),
                    epsilon=args.label_smooth)
            loss, _, __ = model(attn_ids,
                                sent_ids=tgt_sids,
                                pos_ids=tgt_pids,
                                attn_bias=mask_attn_2_srctgtattn,
                                past_cache=(past_cache_k, past_cache_v),
                                tgt_labels=tgt_labels,
                                tgt_pos=paddle.nonzero(attn_ids == attn_id))
            if global_step % args.logging_steps == 0:
                if (not args.n_gpu > 1) or paddle.distributed.get_rank() == 0:
                    logger.info(
                        "global step %d, epoch: %d, batch: %d, loss: %f, speed: %.2f step/s, lr: %.3e"
                        % (global_step, epoch, step, loss, args.logging_steps /
                           (time.time() - tic_train), lr_scheduler.get_lr()))
                tic_train = time.time()

            loss.backward()
            optimizer.step()
            lr_scheduler.step()
            optimizer.clear_gradients()
            if global_step % args.save_steps == 0 and (
                (not args.n_gpu > 1) or paddle.distributed.get_rank() == 0):
                evaluate(model, dev_data_loader, tokenizer, rouge1, rouge2,
                         attn_id, tgt_type_id, args)
                output_dir = os.path.join(args.output_dir,
                                          "model_%d" % global_step)
                if not os.path.exists(output_dir):
                    os.makedirs(output_dir)
                model_to_save = model._layers if isinstance(
                    model, paddle.DataParallel) else model
                model_to_save.save_pretrained(output_dir)
                tokenizer.save_pretrained(output_dir)
            global_step += 1
Beispiel #2
0
    def forward(self,
                input_ids=None,
                token_type_ids=None,
                position_ids=None,
                attention_mask=None,
                query_input_ids=None,
                query_token_type_ids=None,
                query_position_ids=None,
                query_attention_mask=None,
                title_input_ids=None,
                title_token_type_ids=None,
                title_position_ids=None,
                title_attention_mask=None,
                seq_lengths=None,
                labels=None):

        if self.task != 'text-matching':
            result = self.model(input_ids, token_type_ids, position_ids,
                                attention_mask)
        else:
            query_result = self.model(query_input_ids, query_token_type_ids,
                                      query_position_ids, query_attention_mask)
            title_result = self.model(title_input_ids, title_token_type_ids,
                                      title_position_ids, title_attention_mask)

        if self.task == 'seq-cls':
            logits = result
            probs = F.softmax(logits, axis=1)
            if labels is not None:
                loss = self.criterion(logits, labels)
                correct = self.metric.compute(probs, labels)
                acc = self.metric.update(correct)
                return probs, loss, {'acc': acc}
            return probs
        elif self.task == 'token-cls':
            logits = result
            token_level_probs = F.softmax(logits, axis=-1)
            preds = token_level_probs.argmax(axis=-1)
            if labels is not None:
                loss = self.criterion(logits, labels.unsqueeze(-1))
                num_infer_chunks, num_label_chunks, num_correct_chunks = \
                    self.metric.compute(None, seq_lengths, preds, labels)
                self.metric.update(num_infer_chunks.numpy(),
                                   num_label_chunks.numpy(),
                                   num_correct_chunks.numpy())
                _, _, f1_score = map(float, self.metric.accumulate())
                return token_level_probs, loss, {'f1_score': f1_score}
            return token_level_probs
        elif self.task == 'text-matching':
            query_token_embedding = query_result
            query_token_embedding = self.dropout(query_token_embedding)
            query_attention_mask = paddle.unsqueeze(
                (query_input_ids != self.model.pad_token_id).astype(
                    query_token_embedding.dtype),
                axis=2)
            query_token_embedding = query_token_embedding * query_attention_mask
            query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
            query_sum_mask = paddle.sum(query_attention_mask, axis=1)
            query_mean = query_sum_embedding / query_sum_mask

            title_token_embedding = title_result
            title_token_embedding = self.dropout(title_token_embedding)
            title_attention_mask = paddle.unsqueeze(
                (title_input_ids != self.model.pad_token_id).astype(
                    title_token_embedding.dtype),
                axis=2)
            title_token_embedding = title_token_embedding * title_attention_mask
            title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
            title_sum_mask = paddle.sum(title_attention_mask, axis=1)
            title_mean = title_sum_embedding / title_sum_mask

            sub = paddle.abs(paddle.subtract(query_mean, title_mean))
            projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
            logits = self.classifier(projection)
            probs = F.softmax(logits)
            if labels is not None:
                loss = self.criterion(logits, labels)
                correct = self.metric.compute(probs, labels)
                acc = self.metric.update(correct)
                return probs, loss, {'acc': acc}
            return probs
        else:
            sequence_output, pooled_output = result
            return sequence_output, pooled_output
 def test_quant_concat(self):
     out_1 = paddle.concat([self.x, self.y], axis=0)
     out_2 = paddle.nn.quant.concat()([self.x, self.y], 0)
     self.check(out_1, out_2)
     self.assertTrue(out_1.shape == out_2.shape)
    def sample(self,
               input_ids,
               logits_processors,
               max_length,
               pad_token_id,
               eos_token_id,
               top_k=None,
               top_p=None,
               temperature=None,
               min_tokens_to_keep=1,
               **model_kwargs):
        def TopKProcess(probs, top_k, min_tokens_to_keep):
            top_k = min(max(top_k, min_tokens_to_keep), probs.shape[-1])
            # Remove all tokens with a probability less than the last token of the top-k
            topk_probs, _ = paddle.topk(probs, k=top_k)
            probs = paddle.where(probs >= topk_probs[:, -1:], probs,
                                 paddle.full_like(probs, 0.0))
            return probs

        def TopPProcess(probs, top_p, min_tokens_to_keep):
            sorted_probs = paddle.sort(probs, descending=True)
            sorted_indices = paddle.argsort(probs, descending=True)
            cumulative_probs = paddle.cumsum(sorted_probs, axis=-1)

            # Remove tokens with cumulative probs above the top_p, But keep at 
            # least min_tokens_to_keep tokens
            sorted_indices_to_remove = cumulative_probs > top_p
            if min_tokens_to_keep > 1:
                # Set 'min_tokens_to_keep - 1' because the first token is kept
                sorted_indices_to_remove[:, :min_tokens_to_keep - 1] = 0
            # Keep the first token
            sorted_indices_to_remove = paddle.cast(
                sorted_indices_to_remove, dtype='int64')
            sorted_indices_to_remove[:, 1:] = (
                sorted_indices_to_remove[:, :-1].clone())
            sorted_indices_to_remove[:, 0] = 0

            # Scatter sorted tensors to original indexing
            sorted_indices = sorted_indices + paddle.arange(probs.shape[
                0]).unsqueeze(-1) * probs.shape[-1]
            condition = paddle.scatter(sorted_indices_to_remove.flatten(),
                                       sorted_indices.flatten(),
                                       sorted_indices_to_remove.flatten())
            condition = paddle.cast(condition, 'bool').reshape(probs.shape)
            probs = paddle.where(condition, paddle.full_like(probs, 0.0), probs)
            return probs

        batch_size, cur_len = input_ids.shape
        origin_len = cur_len
        unfinished_flag = paddle.full([batch_size, 1], True, dtype='bool')
        scores = paddle.full(
            [batch_size, 1], 0.0, dtype=paddle.get_default_dtype())

        while cur_len < max_length:
            # prepare model inputs & get model output
            model_inputs = self.prepare_inputs_for_generation(input_ids,
                                                              **model_kwargs)
            outputs = self(**model_inputs)
            logits = outputs[0] if isinstance(outputs, tuple) else outputs
            # [batch_size, vocab_size]
            logits = logits[:, -1, :]

            # pre-process distribution
            logits = self.adjust_logits_during_generation(logits)
            logits = logits_processors(input_ids, logits)

            # sample
            origin_probs = F.softmax(logits)
            origin_probs = paddle.log(origin_probs)
            if temperature is not None and temperature != 1.0:
                logits = logits / temperature
            probs = F.softmax(logits)
            if top_k is not None and top_k != 0:
                probs = TopKProcess(probs, top_k, min_tokens_to_keep)
            if top_p is not None and top_p < 1.0:
                probs = TopPProcess(probs, top_p, min_tokens_to_keep)
            next_tokens = paddle.multinomial(probs)
            next_scores = paddle.index_sample(origin_probs, next_tokens)

            if eos_token_id is not None:
                next_tokens = paddle.where(unfinished_flag, next_tokens,
                                           paddle.full_like(next_tokens,
                                                            pad_token_id))

            scores = self.update_scores_for_generation(
                scores, next_scores, cur_len - origin_len, unfinished_flag)

            cur_len += 1
            input_ids = paddle.concat([input_ids, next_tokens], axis=1)

            if eos_token_id is not None:
                unfinished_flag = paddle.logical_and(
                    unfinished_flag, next_tokens != eos_token_id)

            # Stop when there is a </s> in all sentences
            if not paddle.any(unfinished_flag):
                break
            model_kwargs = self.update_model_kwargs_for_generation(outputs,
                                                                   model_kwargs)
        return input_ids[:, origin_len:], scores
Beispiel #5
0
    def __call__(self,
                 seg_preds,
                 seg_masks,
                 cate_labels,
                 cate_scores,
                 sum_masks=None):
        # sort and keep top nms_pre
        sort_inds = self._sort_score(cate_scores, self.pre_nms_top_n)
        seg_masks = paddle.gather(seg_masks, index=sort_inds)
        seg_preds = paddle.gather(seg_preds, index=sort_inds)
        sum_masks = paddle.gather(sum_masks, index=sort_inds)
        cate_scores = paddle.gather(cate_scores, index=sort_inds)
        cate_labels = paddle.gather(cate_labels, index=sort_inds)

        seg_masks = paddle.flatten(seg_masks, start_axis=1, stop_axis=-1)
        # inter.
        inter_matrix = paddle.mm(seg_masks,
                                 paddle.transpose(seg_masks, [1, 0]))
        n_samples = paddle.shape(cate_labels)
        # union.
        sum_masks_x = paddle.expand(sum_masks, shape=[n_samples, n_samples])
        # iou.
        iou_matrix = (inter_matrix /
                      (sum_masks_x + paddle.transpose(sum_masks_x, [1, 0]) -
                       inter_matrix))
        iou_matrix = paddle.triu(iou_matrix, diagonal=1)
        # label_specific matrix.
        cate_labels_x = paddle.expand(cate_labels,
                                      shape=[n_samples, n_samples])
        label_matrix = paddle.cast(
            (cate_labels_x == paddle.transpose(cate_labels_x, [1, 0])),
            'float32')
        label_matrix = paddle.triu(label_matrix, diagonal=1)

        # IoU compensation
        compensate_iou = paddle.max((iou_matrix * label_matrix), axis=0)
        compensate_iou = paddle.expand(compensate_iou,
                                       shape=[n_samples, n_samples])
        compensate_iou = paddle.transpose(compensate_iou, [1, 0])

        # IoU decay
        decay_iou = iou_matrix * label_matrix

        # matrix nms
        if self.kernel == 'gaussian':
            decay_matrix = paddle.exp(-1 * self.sigma * (decay_iou**2))
            compensate_matrix = paddle.exp(-1 * self.sigma *
                                           (compensate_iou**2))
            decay_coefficient = paddle.min(decay_matrix / compensate_matrix,
                                           axis=0)
        elif self.kernel == 'linear':
            decay_matrix = (1 - decay_iou) / (1 - compensate_iou)
            decay_coefficient = paddle.min(decay_matrix, axis=0)
        else:
            raise NotImplementedError

        # update the score.
        cate_scores = cate_scores * decay_coefficient
        y = paddle.zeros(shape=paddle.shape(cate_scores), dtype='float32')
        keep = paddle.where(cate_scores >= self.update_threshold, cate_scores,
                            y)
        keep = paddle.nonzero(keep)
        keep = paddle.squeeze(keep, axis=[1])
        # Prevent empty and increase fake data
        keep = paddle.concat(
            [keep,
             paddle.cast(paddle.shape(cate_scores)[0] - 1, 'int64')])

        seg_preds = paddle.gather(seg_preds, index=keep)
        cate_scores = paddle.gather(cate_scores, index=keep)
        cate_labels = paddle.gather(cate_labels, index=keep)

        # sort and keep top_k
        sort_inds = self._sort_score(cate_scores, self.post_nms_top_n)
        seg_preds = paddle.gather(seg_preds, index=sort_inds)
        cate_scores = paddle.gather(cate_scores, index=sort_inds)
        cate_labels = paddle.gather(cate_labels, index=sort_inds)
        return seg_preds, cate_scores, cate_labels
Beispiel #6
0
    def forward(self, pixels, regions):
        context = self.attention_block(pixels, regions)
        feats = paddle.concat([context, pixels], axis=1)
        feats = self.conv1x1(feats)

        return feats
Beispiel #7
0
 def concat_unsqueeze2(inputs):
     a = (inputs[:, :, 0] + 2) * 7
     b = (inputs[:, :, 1] + 3) * 11
     c = (inputs[:, :, 2] + 5) * 13
     return paddle.concat([paddle.unsqueeze(t, axis=2) for t in [a, b, c]],
                          axis=2)
 def to_tensor(self):
     return paddle.concat([self.quaternion, self.translation], axis=-1)
Beispiel #9
0
 def forward(self, x, fusions):
     r_f = paddle.concat([x, fusions], axis=2)
     r = F.tanh(self.linear_r(r_f))
     g = F.sigmoid(self.linear_g(r_f))
     o = g * r + (1-g) * x
     return o
Beispiel #10
0
    def forward(self, indices, segments, positions, input_mask):
        r'''
        The BertModel forward method, overrides the `__call__()` special method.

        Args:
            indices (Tensor):
                Indices of input sequence tokens in the vocabulary. They are
                numerical representations of tokens that build the input sequence.
                Its data type should be `int32` and it has a shape of [batch_size * sequence_length].
            segments (Tensor):
                Segment token indices to indicate different portions of the inputs.
                Selected in the range ``[0, type_vocab_size - 1]``.
                Its data type should be `int32` and it has a shape of [batch_size * sequence_length].
            positions(Tensor):
                Indices of positions of each input sequence tokens in the position embeddings. Selected in the range ``[0,
                max_position_embeddings - 1]``.
                Shape as `[batch_size * sequence_length]` and dtype as int32.
            input_mask (Tensor, optional):
                Mask used in multi-head attention to avoid performing attention on to some unwanted positions,
                usually the paddings or the subsequent positions.
                If the task is PRETRAINING:
                    input_mask[0] is the index that masking starts in the mask_tokens
                    input_mask[1] is the index that masking starts in the rest of the sequence
                Otherwise
                    input_mask is the mask tensor that has -1000 in positions to be masked and 0 otherwise.

        Returns:
            tuple: Returns tuple (`sequence_output`, `word_embeddings_weights`).

            With the fields:

            - `sequence_output` (Tensor):
                Sequence of hidden-states at the last layer of the model.
                It's data type should be float32 and its shape is [batch_size, sequence_length, hidden_size].
        '''

        with self.config.embeddings_scope:
            sequence_output, word_embeddings_weights = self.embedding(
                indices, segments, positions)

        if self.config.task == "PRETRAINING":
            with paddle.static.ipu_shard_guard(index=0, stage=0):
                input_mask[0] = self.custom_ops.detach(input_mask[0])
                input_mask[1] = self.custom_ops.detach(input_mask[1])

        for i in range(self.config.num_hidden_layers):
            # Attention
            attn_scope = self.config.attn_scopes[i]
            with attn_scope:
                with paddle.static.name_scope(f"Layer{i}/Attention"):
                    layer_input = sequence_output
                    q = self.create_parameter(shape=[
                        self.config.hidden_size, self.config.hidden_size
                    ],
                                              dtype="float32")
                    k = self.create_parameter(shape=[
                        self.config.hidden_size, self.config.hidden_size
                    ],
                                              dtype="float32")
                    v = self.create_parameter(shape=[
                        self.config.hidden_size, self.config.hidden_size
                    ],
                                              dtype="float32")
                    qkv = paddle.concat([q, k, v], axis=1)
                    qkv = paddle.matmul(sequence_output, qkv)
                    qkv.block.ops[-1]._set_attr(
                        '__available_memory',
                        self.config.available_mem_proportion)
                    q, k, v = paddle.split(qkv,
                                           num_or_sections=[
                                               self.config.hidden_size,
                                               self.config.hidden_size,
                                               self.config.hidden_size
                                           ],
                                           axis=1)
                    q = paddle.reshape(q, self.qkv_shape)
                    q = paddle.transpose(q, [0, 2, 1, 3])
                    k = paddle.reshape(k, self.qkv_shape)
                    k = paddle.transpose(k, [0, 2, 3, 1])
                    v = paddle.reshape(v, self.qkv_shape)
                    v = paddle.transpose(v, [0, 2, 1, 3])

                    # Attention calculation
                    with paddle.static.name_scope(f"Z"):
                        if self.config.task == "PRETRAINING":
                            if attn_scope.index in self.masks:
                                final_mask = self.masks[attn_scope.index]
                            else:
                                with paddle.static.name_scope("Mask"):
                                    base_value = np.arange(
                                        self.config.seq_len).astype('int32')
                                    base = paddle.fluid.layers.assign(
                                        base_value)
                                    mmask = paddle.less_than(
                                        base, input_mask[0])
                                    mask_value = np.greater_equal(
                                        base_value,
                                        self.config.max_predictions_per_seq)
                                    mask = paddle.fluid.layers.assign(
                                        mask_value)
                                    mmask = paddle.logical_or(mmask, mask)
                                    smask = paddle.less_than(
                                        base, input_mask[1])
                                    final_mask = paddle.logical_and(
                                        mmask, smask)
                                    final_mask = paddle.cast(
                                        final_mask, "float16")
                                    sub_attrs = {
                                        'name': 'constant_sub',
                                        'shape': [1],
                                        'dtype': 'float32',
                                        'value': 1,
                                    }
                                    mul_attrs = {
                                        'name': 'constant_mul',
                                        'shape': [1],
                                        'dtype': 'float32',
                                        'value': 1000,
                                    }
                                    final_mask = paddle.fluid.layers.elementwise_sub(
                                        final_mask,
                                        paddle.fluid.layers.fill_constant(
                                            **sub_attrs))
                                    final_mask = paddle.fluid.layers.elementwise_mul(
                                        final_mask,
                                        paddle.fluid.layers.fill_constant(
                                            **mul_attrs))
                                    final_mask = paddle.reshape(
                                        final_mask,
                                        [-1, 1, 1, self.config.seq_len])
                                    final_mask = self.custom_ops.detach(
                                        final_mask)
                                    self.masks[attn_scope.index] = final_mask

                        qk = paddle.matmul(q, k)
                        qk.block.ops[-1]._set_attr(
                            '__available_memory',
                            self.config.available_mem_proportion)
                        qk_scale = paddle.fluid.layers.fill_constant(
                            **self.qk_scale_attrs)
                        qk = paddle.fluid.layers.elementwise_mul(qk, qk_scale)

                        if self.config.task == "PRETRAINING":
                            qk = paddle.fluid.layers.elementwise_add(
                                qk, final_mask)
                        else:
                            # for SQUAD task, input_mask is calculated in data preprocessing
                            qk = paddle.fluid.layers.elementwise_add(
                                qk, input_mask)

                        qk = paddle.fluid.layers.softmax(qk)
                        if self.config.task == "SQUAD":
                            qk = paddle.fluid.layers.dropout(
                                qk,
                                self.config.attention_probs_dropout_prob,
                                dropout_implementation='upscale_in_train')
                        qkv = paddle.matmul(qk, v)
                        qkv.block.ops[-1]._set_attr(
                            '__available_memory',
                            self.config.available_mem_proportion)
                        qkv = paddle.transpose(qkv, [0, 2, 1, 3])
                        qkv = paddle.reshape(qkv,
                                             [-1, self.config.hidden_size])

                    qkv_linear = nn.Linear(self.config.hidden_size,
                                           self.config.hidden_size,
                                           bias_attr=False)
                    qkv = qkv_linear(qkv)
                    qkv.block.ops[-1]._set_attr(
                        '__available_memory',
                        self.config.available_mem_proportion)
                    qkv = paddle.fluid.layers.dropout(
                        qkv,
                        self.config.attention_probs_dropout_prob,
                        dropout_implementation='upscale_in_train')
                    attention = paddle.add(layer_input, qkv)
                    layer_norm1 = nn.LayerNorm(self.config.hidden_size,
                                               epsilon=0.001)
                    attention = layer_norm1(attention)

            # FF
            with self.config.ff_scopes[i]:
                with paddle.static.name_scope(f"Layer{i}/FF"):
                    ff_linear1 = nn.Linear(self.config.hidden_size,
                                           4 * self.config.hidden_size)
                    ff_linear2 = nn.Linear(4 * self.config.hidden_size,
                                           self.config.hidden_size)
                    with paddle.static.name_scope(f"1"):
                        ff = ff_linear1(attention)
                        ff.block.ops[-2]._set_attr(
                            '__available_memory',
                            self.config.available_mem_proportion)
                    ff = paddle.fluid.layers.gelu(ff, approximate=True)
                    with paddle.static.name_scope(f"2"):
                        ff = ff_linear2(ff)
                        ff.block.ops[-2]._set_attr(
                            '__available_memory',
                            self.config.available_mem_proportion)
                    ff = paddle.fluid.layers.dropout(
                        ff,
                        self.config.attention_probs_dropout_prob,
                        dropout_implementation='upscale_in_train')
                    ff = paddle.add(attention, ff)
                    layer_norm2 = nn.LayerNorm(self.config.hidden_size,
                                               epsilon=0.001)
                    sequence_output = layer_norm2(ff)

                if self.should_checkpoint(i):
                    with paddle.static.name_scope(f"Layer{i}"):
                        logging.info(f'add checkpointoutput for ff_{i}')
                        sequence_output = self.custom_ops.checkpointoutput(
                            sequence_output)
        return sequence_output, word_embeddings_weights
    def _encoder_preprocessor(self, position_sequence, n_node, global_context,
                              particle_types):
        # Extract important features from the position_sequence.
        most_recent_position = position_sequence[:, -1]
        velocity_sequence = time_diff(position_sequence)  # Finite-difference.

        # Get connectivity of the graph.
        (senders, receivers, n_edge
         ) = connectivity_utils_paddle.compute_connectivity_for_batch_pyfunc(
             most_recent_position, n_node, self._connectivity_radius)

        # Collect node features.
        node_features = []
        # node_feat总共包含以下几项:
        # 1.flat_velocity_Sequence
        # 2.distance_to_lower_boundary
        # 3.distance_to_upper_boundary
        # 4.particle_type_embeddings

        # Normalized velocity sequence, merging spatial an time axis.
        velocity_stats = self._normalization_stats["velocity"]
        normalized_velocity_sequence = (
            velocity_sequence - velocity_stats.mean) / velocity_stats.std

        ########################################################
        flat_velocity_sequence = paddle.reshape(normalized_velocity_sequence,
                                                (1444, 10))
        node_features.append(flat_velocity_sequence)

        # Normalized clipped distances to lower and upper boundaries.
        # boundaries are an array of shape [num_dimensions, 2], where the second
        # axis, provides the lower/upper boundaries.
        boundaries = np.array(self._boundaries)

        # x[:,n]就是取所有集合的第n个数据
        distance_to_lower_boundary = most_recent_position - boundaries[:, 0]
        distance_to_upper_boundary = boundaries[:, 1] - most_recent_position
        distance_to_boundaries = paddle.concat(  # 拼接张量操作
            [distance_to_lower_boundary, distance_to_upper_boundary],
            axis=1)
        normalized_clipped_distance_to_boundaries = paddle.clip(
            distance_to_boundaries / self._connectivity_radius, min=-1, max=1)
        # 将距离控制在-1~1之间
        node_features.append(normalized_clipped_distance_to_boundaries)

        # Particle type.
        # tf.nn.embedding_lookup查找数组中的序号为particle_types的元素
        if self._num_particle_types > 1:
            particle_type_embeddings = paddle.gather(
                self._particle_type_embedding,
                particle_types)  # particle_types存储着标号
            node_features.append(particle_type_embeddings)

    #   # Collect edge features.
        edge_features = []
        #1. normalized_realative_displacements = sender - receiver / radius
        #2. normalized_relative_distances       向量/矩阵的范数
        # Relative displacement and distances normalized to radius
        normalized_relative_displacements = (
            paddle.gather(most_recent_position, senders) - paddle.gather(
                most_recent_position, receivers)) / self._connectivity_radius
        # sender - receiver / radius
        edge_features.append(normalized_relative_displacements)

        normalized_relative_distances = paddle.norm(
            normalized_relative_displacements, axis=-1, keepdim=True)
        edge_features.append(normalized_relative_distances)

        # Normalize the global context.
        if global_context is not None:
            context_stats = self._normalization_stats["context"]
            # Context in some datasets are all zero, so add an epsilon for numerical
            # stability.
            global_context = (global_context -
                              context_stats.mean) / paddle.maximum(
                                  context_stats.std, STD_EPSILON)

        return pgl.Graph(
            edges=n_edge,
            num_nodes=n_node,
            node_feat=paddle.concat(node_features, axis=-1),
            edge_feat=paddle.concat(edge_features, axis=-1),
        )
Beispiel #12
0
    def forward(self,
                query_matrix,
                key_matrix,
                value_matrix,
                d_head,
                attn_mask=None,
                rand_mask_idx=None,
                query_mask=None,
                key_mask=None,
                dropout=None):
        '''
            query_matrix: [B, H, T, D]
            key_matrix: [B, H, T, D]
            value_matrix: [B, H, T, D]
            query_mask: [B, 1, T, 1]  bool mask
            key_mask: [B, 1, 1, T]    bool mask
            rand_mask_idx: [H, T//bs, bs]
            Global Attention
            Random Attention
            Window Attention            
        '''
        B = query_matrix.shape[0]  # batch_size
        H = self.num_heads
        T = query_matrix.shape[2]  # sequence_length
        D = query_matrix.shape[3]  # size per head
        G = self.num_global_blocks
        GB = self.num_global_blocks_back
        GF = self.num_global_blocks_front
        R = self.num_rand_blocks
        W = self.window_size
        bs = self.block_size
        L = T // bs  # blocked length

        blocked_query_matrix = paddle.reshape(query_matrix, [B, H, L, bs, -1])
        blocked_key_matrix = paddle.reshape(key_matrix, [B, H, L, bs, -1])
        blocked_value_matrix = paddle.reshape(value_matrix, [B, H, L, bs, -1])
        blocked_query_mask = paddle.reshape(query_mask, [B, L, bs])
        blocked_key_mask = paddle.reshape(key_mask, [B, L, bs])

        # 1. global_front_product 
        global_front_out = self._get_global_out(
            query_matrix, key_matrix, value_matrix, key_mask, d_head, dropout)

        # 2. global_back_product
        global_back_out = self._get_global_out(query_matrix, key_matrix,
                                               value_matrix, key_mask, d_head,
                                               dropout, False)

        # 3. second_product

        # create second matrix
        # [B, 1, L-G, bs, (G+W)*bs]
        band_mask = self._get_band_mask(blocked_query_mask, blocked_key_mask, B,
                                        T)
        # [B, H, L-G, bs, R*bs]
        rand_mask = self._get_rand_mask(blocked_query_mask, blocked_key_mask,
                                        rand_mask_idx, B, T)
        # [B, H, L-G, bs, (G+W+R)*bs]
        second_mask = paddle.concat([band_mask, rand_mask], axis=4)

        # [B, H, L-G, R * bs, -1]
        random_keys = self._gather_random_key_value(blocked_key_matrix,
                                                    rand_mask_idx, B, T)
        random_values = self._gather_random_key_value(blocked_value_matrix,
                                                      rand_mask_idx, B, T)

        band_keys_matrix = self._get_band_matrix(blocked_key_matrix, B, T)
        band_value_matrix = self._get_band_matrix(blocked_value_matrix, B, T)

        # [B, H, L - G, bs, -1]
        second_query_matrix = blocked_query_matrix[:, :, GF:-GB]
        # [B, H, L - G, (G+W+R)*bs, -1]
        second_key_matrix = paddle.concat(
            [band_keys_matrix, random_keys], axis=3)
        # [B, H, L - G, (G+W+R)*bs, -1]
        second_value_matrix = paddle.concat(
            [band_value_matrix, random_values], axis=3)
        second_top_value_matrix, second_middle_value_matrix, second_bottom_value_matrix = \
            self._get_splited_matrix(second_value_matrix)

        second_product = paddle.matmul(
            second_query_matrix, second_key_matrix, transpose_y=True)
        second_product = second_product * (d_head**-0.5)
        second_product += (1 - second_mask) * -1e6
        second_weights = F.softmax(second_product)

        second_top_weights, second_middle_weights, second_bottom_weights = \
            self._get_splited_matrix(second_weights)
        second_top_out = paddle.matmul(second_top_weights,
                                       second_top_value_matrix)

        second_middle_out = paddle.matmul(
            second_middle_weights[:, :, :, :, GF * bs:-(GB + R) * bs],
            second_middle_value_matrix[:, :, :, GF * bs:-(GB + R) * bs])
        # add global block attention
        second_middle_out += paddle.matmul(
            second_middle_weights[:, :, :, :, :GF * bs],
            blocked_value_matrix[:, :, 0:GF])
        second_middle_out += paddle.matmul(
            second_middle_weights[:, :, :, :, -(GB + R) * bs:-R * bs],
            blocked_value_matrix[:, :, -GB:])
        # add random block attention
        second_middle_out += paddle.matmul(
            second_middle_weights[:, :, :, :, -R * bs:],
            random_values[:, :, GF:-GB])

        second_bottom_out = paddle.matmul(second_bottom_weights,
                                          second_bottom_value_matrix)
        second_out = paddle.concat(
            [second_top_out, second_middle_out, second_bottom_out], axis=2)
        second_out = paddle.reshape(second_out, [B, H, (L - G) * bs, -1])

        # [B, H, T, D]
        out = paddle.concat(
            [global_front_out, second_out, global_back_out], axis=2)
        out = out * query_mask
        return out
Beispiel #13
0
    def _get_band_matrix(self, blocked_matrix, B, T):
        '''
        return global and window matrix: [B, H, L-G, (G+W) * bs, -1]
        '''
        # blocked_matrix: [B, H, L, bs, -1]
        GB = self.num_global_blocks_back
        GF = self.num_global_blocks_front
        G = self.num_global_blocks
        R = self.num_rand_blocks
        W = self.window_size
        bs = self.block_size
        L = T // bs  # blocked length
        H = self.num_heads

        # get roll matrix
        blocked_list = []
        for query_block_id in range(GF, GF + W // 2):
            left_block_id = query_block_id - W // 2
            right_block_id = query_block_id + W // 2
            temp_blocked_matrix_list = [
                blocked_matrix[:, :, 0:(right_block_id + 1)],
                blocked_matrix[:, :, -(G + W - right_block_id - 1):]
            ]
            temp_blocked_matrix = paddle.concat(
                temp_blocked_matrix_list, axis=2)
            temp_blocked_matrix = paddle.unsqueeze(temp_blocked_matrix, axis=2)
            blocked_list.append(temp_blocked_matrix)

        # get window matrix
        band_length = L - G - W // 2 * 2
        band_matrix_list = []
        for query_block_id in range(GF + W // 2, GF + W // 2 + W):
            left_block_id = query_block_id - W // 2
            right_block_id = query_block_id + W // 2
            band_matrix_list.append(
                paddle.unsqueeze(
                    blocked_matrix[:, :, left_block_id:left_block_id +
                                   band_length],
                    axis=3))
        band_matrix = paddle.concat(band_matrix_list, axis=3)

        global_blocked_front_matrix = paddle.unsqueeze(
            blocked_matrix[:, :, :GF], axis=2)
        global_blocked_front_matrix = paddle.expand(
            global_blocked_front_matrix, [B, H, band_length, GF, bs, -1])
        global_blocked_back_matrix = paddle.unsqueeze(
            blocked_matrix[:, :, -GB:], axis=2)
        global_blocked_back_matrix = paddle.expand(
            global_blocked_back_matrix, [B, H, band_length, GB, bs, -1])
        band_matrix = paddle.concat(
            [
                global_blocked_front_matrix, band_matrix,
                global_blocked_back_matrix
            ],
            axis=3)
        blocked_list.append(band_matrix)

        for query_block_id in range(L - GB - W // 2, L - GB):
            left_block_id = query_block_id - W // 2
            right_block_id = query_block_id + W // 2
            temp_blocked_matrix_list = [
                blocked_matrix[:, :, 0:G + W - (L - left_block_id)],
                blocked_matrix[:, :, left_block_id:]
            ]
            temp_blocked_matrix = paddle.concat(
                temp_blocked_matrix_list, axis=2)
            temp_blocked_matrix = paddle.unsqueeze(temp_blocked_matrix, axis=2)
            blocked_list.append(temp_blocked_matrix)

        band_matrix = paddle.concat(blocked_list, axis=2)
        band_matrix = paddle.reshape(band_matrix,
                                     [B, H, L - G, (G + W) * bs, -1])
        return band_matrix
Beispiel #14
0
    def _get_band_mask(self, blocked_query_mask, blocked_key_mask, batch_size,
                       sequence_length):
        '''
        Return second mask: [B, 1, L-G, bs, G+W]
        '''
        GB = self.num_global_blocks_back
        GF = self.num_global_blocks_front
        G = self.num_global_blocks
        R = self.num_rand_blocks
        W = self.window_size
        bs = self.block_size
        T = sequence_length
        L = T // bs  # blocked length
        B = batch_size
        H = self.num_heads
        # G+W+R
        # query_mask: [B, L, bs]
        # key_mask: [B, L, bs]
        # [B, L-G, bs, 1] * [B, L-G, 1, G*bs] -> [B, L-G, bs, G*bs]
        temp_query_mask = paddle.reshape(blocked_query_mask[:, GF:-GB],
                                         [B, L - G, bs, 1])
        temp_key_mask_front = paddle.reshape(blocked_key_mask[:, :GF],
                                             [B, 1, 1, GF * bs])
        global_block_mask_front = paddle.matmul(temp_query_mask,
                                                temp_key_mask_front)

        temp_key_mask_back = paddle.reshape(blocked_key_mask[:, -GB:],
                                            [B, 1, 1, GB * bs])
        global_block_mask_back = paddle.matmul(temp_query_mask,
                                               temp_key_mask_back)

        # create window block mask
        key_mask_list = []
        for query_block_id in range(GF, GF + W // 2):
            left_block_id = query_block_id - W // 2
            right_block_id = query_block_id + W // 2
            zero_key_mask = paddle.zeros_like(blocked_key_mask[:, -(W - (
                right_block_id + 1 - G)):-GB])
            temp_key_mask = paddle.concat(
                [blocked_key_mask[:, GF:(right_block_id + 1)], zero_key_mask],
                axis=1)
            temp_key_mask = paddle.unsqueeze(temp_key_mask, 1)
            key_mask_list.append(temp_key_mask)
        roll_key_mask1 = paddle.concat(key_mask_list, axis=1)
        roll_key_mask1 = paddle.reshape(roll_key_mask1, [0, 0, W * bs])
        key_mask_list = []

        band_length = L - G - W // 2 * 2
        for query_block_id in range(GF + W // 2, GF + W // 2 + W):
            left_block_id = query_block_id - W // 2
            right_block_id = query_block_id + W // 2
            key_mask_list.append(blocked_key_mask[:, left_block_id:left_block_id
                                                  + band_length])
        window_key_mask = paddle.concat(key_mask_list, axis=2)
        window_key_mask = paddle.reshape(window_key_mask, [0, 0, W * bs])

        key_mask_list = []
        for query_block_id in range((L - GB) - W // 2, L - GB):
            left_block_id = query_block_id - W // 2
            right_block_id = query_block_id + W // 2
            zero_key_mask = paddle.zeros_like(blocked_key_mask[:, GF:GF + W - (
                L - left_block_id - GB)])
            temp_key_mask = paddle.concat(
                [zero_key_mask, blocked_key_mask[:, left_block_id:-GB]], axis=1)
            temp_key_mask = paddle.unsqueeze(temp_key_mask, 1)
            key_mask_list.append(temp_key_mask)
        roll_key_mask2 = paddle.concat(key_mask_list, axis=1)
        roll_key_mask2 = paddle.reshape(roll_key_mask2, [0, 0, W * bs])

        window_key_mask = paddle.concat(
            [roll_key_mask1, window_key_mask, roll_key_mask2], axis=1)
        window_key_mask = paddle.unsqueeze(window_key_mask, axis=2)
        # [B, L-G, bs, 1] * [B, L-G, 1, W*bs] -> [B, L-G, bs, W*bs]
        window_block_mask = paddle.matmul(temp_query_mask, window_key_mask)

        band_mask = paddle.concat(
            [
                global_block_mask_front, window_block_mask,
                global_block_mask_back
            ],
            axis=3)
        band_mask = paddle.unsqueeze(band_mask, 1)  # for head
        band_mask = paddle.expand(band_mask, [B, H, L - G, bs, -1])
        return band_mask
Beispiel #15
0
    def forward(self, inputs, targets=None):
        # if and else branch are both needed when you want to assign a variable
        # if you modify the var in just one branch, then the modification will not work.
        fea = inputs[-1]
        if len(fea.shape) == 3:
            pass
        else:
            last_shape = int(np.prod(fea.shape[2:]))  # gry added
            fea = paddle.reshape(fea, [fea.shape[0], fea.shape[1], last_shape])
            fea = fea.transpose([0, 2, 1])  # (NTC)(batch, width, channels)
        batch_size = fea.shape[0]

        hidden = paddle.zeros((batch_size, self.hidden_size))
        output_hiddens = []
        if self.training and targets is not None:
            structure = targets[0]
            for i in range(self.max_elem_length + 1):
                elem_onehots = self._char_to_onehot(structure[:, i],
                                                    onehot_dim=self.elem_num)
                (outputs, hidden), alpha = self.structure_attention_cell(
                    hidden, fea, elem_onehots)
                output_hiddens.append(paddle.unsqueeze(outputs, axis=1))
            output = paddle.concat(output_hiddens, axis=1)
            structure_probs = self.structure_generator(output)
            if self.loc_type == 1:
                loc_preds = self.loc_generator(output)
                loc_preds = F.sigmoid(loc_preds)
            else:
                loc_fea = fea.transpose([0, 2, 1])
                loc_fea = self.loc_fea_trans(loc_fea)
                loc_fea = loc_fea.transpose([0, 2, 1])
                loc_concat = paddle.concat([output, loc_fea], axis=2)
                loc_preds = self.loc_generator(loc_concat)
                loc_preds = F.sigmoid(loc_preds)
        else:
            temp_elem = paddle.zeros(shape=[batch_size], dtype="int32")
            structure_probs = None
            loc_preds = None
            elem_onehots = None
            outputs = None
            alpha = None
            max_elem_length = paddle.to_tensor(self.max_elem_length)
            i = 0
            while i < max_elem_length + 1:
                elem_onehots = self._char_to_onehot(temp_elem,
                                                    onehot_dim=self.elem_num)
                (outputs, hidden), alpha = self.structure_attention_cell(
                    hidden, fea, elem_onehots)
                output_hiddens.append(paddle.unsqueeze(outputs, axis=1))
                structure_probs_step = self.structure_generator(outputs)
                temp_elem = structure_probs_step.argmax(axis=1, dtype="int32")
                i += 1

            output = paddle.concat(output_hiddens, axis=1)
            structure_probs = self.structure_generator(output)
            structure_probs = F.softmax(structure_probs)
            if self.loc_type == 1:
                loc_preds = self.loc_generator(output)
                loc_preds = F.sigmoid(loc_preds)
            else:
                loc_fea = fea.transpose([0, 2, 1])
                loc_fea = self.loc_fea_trans(loc_fea)
                loc_fea = loc_fea.transpose([0, 2, 1])
                loc_concat = paddle.concat([output, loc_fea], axis=2)
                loc_preds = self.loc_generator(loc_concat)
                loc_preds = F.sigmoid(loc_preds)
        return {'structure_probs': structure_probs, 'loc_preds': loc_preds}
Beispiel #16
0
    def get_loss(self, head_outputs, targets):
        """Here we calculate loss for a batch of images.
        We assign anchors to gts in each image and gather all the assigned
        postive and negative samples. Then loss is calculated on the gathered
        samples.
        """
        cls_logits_list, bboxes_reg_list = head_outputs
        anchors = self.anchor_generator(cls_logits_list)
        anchors = paddle.concat(anchors)

        # matches: contain gt_inds
        # match_labels: -1(ignore), 0(neg) or 1(pos)
        matches_list, match_labels_list = [], []
        # assign anchors to gts, no sampling is involved
        for gt_bbox in targets['gt_bbox']:
            matches, match_labels = self.bbox_assigner(anchors, gt_bbox)
            matches_list.append(matches)
            match_labels_list.append(match_labels)

        # reshape network outputs
        cls_logits = [
            _.transpose([0, 2, 3, 1]).reshape([0, -1, self.num_classes])
            for _ in cls_logits_list
        ]
        bboxes_reg = [
            _.transpose([0, 2, 3, 1]).reshape([0, -1, 4])
            for _ in bboxes_reg_list
        ]
        cls_logits = paddle.concat(cls_logits, axis=1)
        bboxes_reg = paddle.concat(bboxes_reg, axis=1)

        cls_pred_list, cls_tar_list = [], []
        reg_pred_list, reg_tar_list = [], []
        # find and gather preds and targets in each image
        for matches, match_labels, cls_logit, bbox_reg, gt_bbox, gt_class in \
            zip(matches_list, match_labels_list, cls_logits, bboxes_reg,
                targets['gt_bbox'], targets['gt_class']):
            pos_mask = (match_labels == 1)
            neg_mask = (match_labels == 0)
            chosen_mask = paddle.logical_or(pos_mask, neg_mask)

            gt_class = gt_class.reshape([-1])
            bg_class = paddle.to_tensor([self.num_classes],
                                        dtype=gt_class.dtype)
            # a trick to assign num_classes to negative targets
            gt_class = paddle.concat([gt_class, bg_class], axis=-1)
            matches = paddle.where(
                neg_mask, paddle.full_like(matches, gt_class.size - 1),
                matches)

            cls_pred = cls_logit[chosen_mask]
            cls_tar = gt_class[matches[chosen_mask]]
            reg_pred = bbox_reg[pos_mask].reshape([-1, 4])
            reg_tar = gt_bbox[matches[pos_mask]].reshape([-1, 4])
            reg_tar = bbox2delta(anchors[pos_mask], reg_tar, self.weights)
            cls_pred_list.append(cls_pred)
            cls_tar_list.append(cls_tar)
            reg_pred_list.append(reg_pred)
            reg_tar_list.append(reg_tar)
        cls_pred = paddle.concat(cls_pred_list)
        cls_tar = paddle.concat(cls_tar_list)
        reg_pred = paddle.concat(reg_pred_list)
        reg_tar = paddle.concat(reg_tar_list)

        avg_factor = max(1.0, reg_pred.shape[0])
        cls_loss = self.loss_class(cls_pred, cls_tar,
                                   reduction='sum') / avg_factor

        if reg_pred.shape[0] == 0:
            reg_loss = paddle.zeros([1])
            reg_loss.stop_gradient = False
        else:
            reg_loss = self.loss_bbox(reg_pred, reg_tar,
                                      reduction='sum') / avg_factor

        loss = cls_loss + reg_loss
        out_dict = {
            'loss_cls': cls_loss,
            'loss_reg': reg_loss,
            'loss': loss,
        }
        return out_dict
Beispiel #17
0
 def forward(self, inputs):
     x = self.primary_conv(inputs)
     y = self.cheap_operation(x)
     out = paddle.concat([x, y], axis=1)
     return out
Beispiel #18
0
 def forward(self, x: paddle.Tensor) -> paddle.Tensor:
     x = self.conv(x)
     left = self.left(x)
     right = self.right(x)
     concat = paddle.concat([left, right], axis=1)
     return self.fuse(concat)
Beispiel #19
0
 def concat_unsqueeze1(inputs):
     return paddle.concat(
         [inputs[:, 0].unsqueeze(1), inputs[:, 1].unsqueeze(1)], axis=1)
Beispiel #20
0
 def ops(self, inputs):
     """
     operation
     """
     concat = paddle.concat(x=inputs, axis=self.axis)
     return concat
Beispiel #21
0
    def get_loss(self, head_outs, gt_meta):
        cls_scores, bbox_preds = head_outs
        num_level_anchors = [
            featmap.shape[-2] * featmap.shape[-1] for featmap in cls_scores
        ]
        num_imgs = gt_meta['im_id'].shape[0]
        featmap_sizes = [[featmap.shape[-2], featmap.shape[-1]]
                         for featmap in cls_scores]

        decode_bbox_preds = []
        center_and_strides = []
        for featmap_size, stride, bbox_pred in zip(featmap_sizes,
                                                   self.fpn_stride, bbox_preds):
            # center in origin image
            yy, xx = self.get_single_level_center_point(featmap_size, stride,
                                                        self.cell_offset)
            strides = paddle.full((len(xx), ), stride)
            center_and_stride = paddle.stack([xx, yy, strides, strides],
                                             -1).tile([num_imgs, 1, 1])
            center_and_strides.append(center_and_stride)
            center_in_feature = center_and_stride.reshape(
                [-1, 4])[:, :-2] / stride
            bbox_pred = bbox_pred.transpose([0, 2, 3, 1]).reshape(
                [num_imgs, -1, 4 * (self.reg_max + 1)])
            pred_distances = self.distribution_project(bbox_pred)
            decode_bbox_pred_wo_stride = distance2bbox(
                center_in_feature, pred_distances).reshape([num_imgs, -1, 4])
            decode_bbox_preds.append(decode_bbox_pred_wo_stride * stride)

        flatten_cls_preds = [
            cls_pred.transpose([0, 2, 3, 1]).reshape(
                [num_imgs, -1, self.cls_out_channels])
            for cls_pred in cls_scores
        ]
        flatten_cls_preds = paddle.concat(flatten_cls_preds, axis=1)
        flatten_bboxes = paddle.concat(decode_bbox_preds, axis=1)
        flatten_center_and_strides = paddle.concat(center_and_strides, axis=1)

        gt_boxes, gt_labels = gt_meta['gt_bbox'], gt_meta['gt_class']
        pos_num_l, label_l, label_weight_l, bbox_target_l = [], [], [], []
        for flatten_cls_pred, flatten_center_and_stride, flatten_bbox,gt_box,gt_label \
                in zip(flatten_cls_preds.detach(), flatten_center_and_strides.detach(), \
                       flatten_bboxes.detach(),gt_boxes,gt_labels):
            pos_num, label, label_weight, bbox_target = self._get_target_single(
                flatten_cls_pred, flatten_center_and_stride, flatten_bbox,
                gt_box, gt_label)
            pos_num_l.append(pos_num)
            label_l.append(label)
            label_weight_l.append(label_weight)
            bbox_target_l.append(bbox_target)

        labels = paddle.to_tensor(np.stack(label_l, axis=0))
        label_weights = paddle.to_tensor(np.stack(label_weight_l, axis=0))
        bbox_targets = paddle.to_tensor(np.stack(bbox_target_l, axis=0))

        center_and_strides_list = self._images_to_levels(
            flatten_center_and_strides, num_level_anchors)
        labels_list = self._images_to_levels(labels, num_level_anchors)
        label_weights_list = self._images_to_levels(label_weights,
                                                    num_level_anchors)
        bbox_targets_list = self._images_to_levels(bbox_targets,
                                                   num_level_anchors)
        num_total_pos = sum(pos_num_l)
        try:
            num_total_pos = paddle.distributed.all_reduce(num_total_pos.clone(
            )) / paddle.distributed.get_world_size()
        except:
            num_total_pos = max(num_total_pos, 1)

        loss_bbox_list, loss_dfl_list, loss_vfl_list, avg_factor = [], [], [], []
        for cls_score, bbox_pred, center_and_strides, labels, label_weights, bbox_targets, stride in zip(
                cls_scores, bbox_preds, center_and_strides_list, labels_list,
                label_weights_list, bbox_targets_list, self.fpn_stride):
            center_and_strides = center_and_strides.reshape([-1, 4])
            cls_score = cls_score.transpose([0, 2, 3, 1]).reshape(
                [-1, self.cls_out_channels])
            bbox_pred = bbox_pred.transpose([0, 2, 3, 1]).reshape(
                [-1, 4 * (self.reg_max + 1)])
            bbox_targets = bbox_targets.reshape([-1, 4])
            labels = labels.reshape([-1])

            bg_class_ind = self.num_classes
            pos_inds = paddle.nonzero(
                paddle.logical_and((labels >= 0), (labels < bg_class_ind)),
                as_tuple=False).squeeze(1)
            # vfl
            vfl_score = np.zeros(cls_score.shape)

            if len(pos_inds) > 0:
                pos_bbox_targets = paddle.gather(bbox_targets, pos_inds, axis=0)
                pos_bbox_pred = paddle.gather(bbox_pred, pos_inds, axis=0)
                pos_centers = paddle.gather(
                    center_and_strides[:, :-2], pos_inds, axis=0) / stride

                weight_targets = F.sigmoid(cls_score.detach())
                weight_targets = paddle.gather(
                    weight_targets.max(axis=1, keepdim=True), pos_inds, axis=0)
                pos_bbox_pred_corners = self.distribution_project(pos_bbox_pred)
                pos_decode_bbox_pred = distance2bbox(pos_centers,
                                                     pos_bbox_pred_corners)
                pos_decode_bbox_targets = pos_bbox_targets / stride
                bbox_iou = bbox_overlaps(
                    pos_decode_bbox_pred.detach().numpy(),
                    pos_decode_bbox_targets.detach().numpy(),
                    is_aligned=True)

                # vfl
                pos_labels = paddle.gather(labels, pos_inds, axis=0)
                vfl_score[pos_inds.numpy(), pos_labels] = bbox_iou

                pred_corners = pos_bbox_pred.reshape([-1, self.reg_max + 1])
                target_corners = bbox2distance(pos_centers,
                                               pos_decode_bbox_targets,
                                               self.reg_max).reshape([-1])
                # regression loss
                loss_bbox = paddle.sum(
                    self.loss_bbox(pos_decode_bbox_pred,
                                   pos_decode_bbox_targets) * weight_targets)

                # dfl loss
                loss_dfl = self.loss_dfl(
                    pred_corners,
                    target_corners,
                    weight=weight_targets.expand([-1, 4]).reshape([-1]),
                    avg_factor=4.0)
            else:
                loss_bbox = bbox_pred.sum() * 0
                loss_dfl = bbox_pred.sum() * 0
                weight_targets = paddle.to_tensor([0], dtype='float32')

            # vfl loss
            num_pos_avg_per_gpu = num_total_pos
            vfl_score = paddle.to_tensor(vfl_score)
            loss_vfl = self.loss_vfl(
                cls_score, vfl_score, avg_factor=num_pos_avg_per_gpu)

            loss_bbox_list.append(loss_bbox)
            loss_dfl_list.append(loss_dfl)
            loss_vfl_list.append(loss_vfl)
            avg_factor.append(weight_targets.sum())

        avg_factor = sum(avg_factor)
        try:
            avg_factor = paddle.distributed.all_reduce(avg_factor.clone())
            avg_factor = paddle.clip(
                avg_factor / paddle.distributed.get_world_size(), min=1)
        except:
            avg_factor = max(avg_factor.item(), 1)
        if avg_factor <= 0:
            loss_vfl = paddle.to_tensor(0, dtype='float32', stop_gradient=False)
            loss_bbox = paddle.to_tensor(
                0, dtype='float32', stop_gradient=False)
            loss_dfl = paddle.to_tensor(0, dtype='float32', stop_gradient=False)
        else:
            losses_bbox = list(map(lambda x: x / avg_factor, loss_bbox_list))
            losses_dfl = list(map(lambda x: x / avg_factor, loss_dfl_list))
            loss_vfl = sum(loss_vfl_list)
            loss_bbox = sum(losses_bbox)
            loss_dfl = sum(losses_dfl)

        loss_states = dict(
            loss_vfl=loss_vfl, loss_bbox=loss_bbox, loss_dfl=loss_dfl)

        return loss_states
Beispiel #22
0
 def _concat(x, out):
     # concatenate along channel axis
     return paddle.concat((x, out), 1)
    def beam_search(self, input_ids, beam_scorer, logits_processors, max_length,
                    pad_token_id, eos_token_id, **model_kwargs):
        batch_size = len(beam_scorer._beam_hyps)
        num_beams = beam_scorer.num_beams

        batch_beam_size, cur_len = input_ids.shape
        origin_len = cur_len

        assert (
            num_beams * batch_size == batch_beam_size
        ), "Batch dimension of `input_ids` should be {}, but received {}.".format(
            num_beams * batch_size, batch_beam_size)

        beam_scores = paddle.zeros(
            (batch_size, num_beams), dtype=paddle.get_default_dtype())
        beam_scores[:, 1:] = -1e9
        beam_scores = paddle.reshape(beam_scores, [-1])

        while cur_len < max_length:
            # prepare model inputs & get model output
            model_inputs = self.prepare_inputs_for_generation(input_ids,
                                                              **model_kwargs)
            outputs = self(**model_inputs)
            logits = outputs[0] if isinstance(outputs, tuple) else outputs
            # [batch_size, vocab_size]
            logits = logits[:, -1, :]

            # pre-process distribution
            logits = self.adjust_logits_during_generation(logits)
            logits = logits_processors(input_ids, logits)

            # beam search
            # [batch_size * num_beams, vocab_size]
            next_scores = F.softmax(logits)
            next_scores = paddle.log(next_scores)

            next_scores = next_scores + beam_scores.unsqueeze(-1)
            # reshape for beam search
            vocab_size = next_scores.shape[-1]
            next_scores = next_scores.reshape(
                [batch_size, num_beams * vocab_size])

            next_scores, next_tokens = paddle.topk(
                next_scores, 2 * num_beams, axis=1)

            next_indices = next_tokens // vocab_size
            next_tokens = next_tokens % vocab_size

            # stateless
            beam_outputs = beam_scorer.process(
                input_ids,
                next_scores,
                next_tokens,
                next_indices,
                pad_token_id=pad_token_id,
                eos_token_id=eos_token_id, )
            beam_scores = beam_outputs["next_beam_scores"]
            beam_next_tokens = beam_outputs["next_beam_tokens"]
            beam_idx = beam_outputs["next_beam_indices"]

            cur_len += 1
            input_ids = paddle.concat(
                [
                    paddle.index_select(input_ids, beam_idx),
                    beam_next_tokens.unsqueeze(-1)
                ],
                axis=-1)

            if beam_scorer.is_done:
                break
            model_kwargs = self.update_model_kwargs_for_generation(outputs,
                                                                   model_kwargs)
            if model_kwargs["cache"] is not None:
                # reorder the cache
                model_kwargs["cache"] = map_structure(
                    lambda x: paddle.index_select(x, beam_idx),
                    model_kwargs["cache"])

        pred_ids, scores = beam_scorer.finalize(
            input_ids,
            beam_scores,
            next_tokens,
            next_indices,
            pad_token_id=pad_token_id,
            eos_token_id=eos_token_id)
        return pred_ids[:, origin_len:], scores
Beispiel #24
0
    def get_loss(self, scores, deltas, targets, rois, bbox_weight):
        """
        scores (Tensor): scores from bbox head outputs
        deltas (Tensor): deltas from bbox head outputs
        targets (list[List[Tensor]]): bbox targets containing tgt_labels, tgt_bboxes and tgt_gt_inds
        rois (List[Tensor]): RoIs generated in each batch
        """
        cls_name = 'loss_bbox_cls'
        reg_name = 'loss_bbox_reg'
        loss_bbox = {}

        # TODO: better pass args
        tgt_labels, tgt_bboxes, tgt_gt_inds = targets

        # bbox cls
        tgt_labels = paddle.concat(
            tgt_labels) if len(tgt_labels) > 1 else tgt_labels[0]
        valid_inds = paddle.nonzero(tgt_labels >= 0).flatten()
        if valid_inds.shape[0] == 0:
            loss_bbox[cls_name] = paddle.zeros([1], dtype='float32')
        else:
            tgt_labels = tgt_labels.cast('int64')
            tgt_labels.stop_gradient = True
            loss_bbox_cls = F.cross_entropy(input=scores,
                                            label=tgt_labels,
                                            reduction='mean')
            loss_bbox[cls_name] = loss_bbox_cls

        # bbox reg

        cls_agnostic_bbox_reg = deltas.shape[1] == 4

        fg_inds = paddle.nonzero(
            paddle.logical_and(tgt_labels >= 0,
                               tgt_labels < self.num_classes)).flatten()

        if fg_inds.numel() == 0:
            loss_bbox[reg_name] = paddle.zeros([1], dtype='float32')
            return loss_bbox

        if cls_agnostic_bbox_reg:
            reg_delta = paddle.gather(deltas, fg_inds)
        else:
            fg_gt_classes = paddle.gather(tgt_labels, fg_inds)

            reg_row_inds = paddle.arange(fg_gt_classes.shape[0]).unsqueeze(1)
            reg_row_inds = paddle.tile(reg_row_inds, [1, 4]).reshape([-1, 1])

            reg_col_inds = 4 * fg_gt_classes.unsqueeze(1) + paddle.arange(4)

            reg_col_inds = reg_col_inds.reshape([-1, 1])
            reg_inds = paddle.concat([reg_row_inds, reg_col_inds], axis=1)

            reg_delta = paddle.gather(deltas, fg_inds)
            reg_delta = paddle.gather_nd(reg_delta, reg_inds).reshape([-1, 4])
        rois = paddle.concat(rois) if len(rois) > 1 else rois[0]
        tgt_bboxes = paddle.concat(
            tgt_bboxes) if len(tgt_bboxes) > 1 else tgt_bboxes[0]

        reg_target = bbox2delta(rois, tgt_bboxes, bbox_weight)
        reg_target = paddle.gather(reg_target, fg_inds)
        reg_target.stop_gradient = True

        if self.bbox_loss is not None:
            reg_delta = self.bbox_transform(reg_delta)
            reg_target = self.bbox_transform(reg_target)
            loss_bbox_reg = self.bbox_loss(
                reg_delta, reg_target).sum() / tgt_labels.shape[0]
            loss_bbox_reg *= self.num_classes
        else:
            loss_bbox_reg = paddle.abs(reg_delta -
                                       reg_target).sum() / tgt_labels.shape[0]

        loss_bbox[reg_name] = loss_bbox_reg

        return loss_bbox
Beispiel #25
0
    def forward(self, inputs):
        """modeling forward stage of encoder
        """
        seq_hidden, cls_hidden = self.base_encoder(inputs['src_ids'],
                                                   inputs['sent_ids'])
        if self.pretrain_model_type != 'ERNIE' and self.pretrain_model_type != 'BERT':
            cls_hidden, seq_hidden = seq_hidden, cls_hidden

        question_tokens_index = inputs["question_tokens_index"]
        table_indexes = inputs["table_indexes"]
        column_indexes = inputs["column_indexes"]
        value_indexes = inputs["value_indexes"]

        question_encs = nn_utils.batch_gather_2d(seq_hidden,
                                                 question_tokens_index)
        table_encs = nn_utils.batch_gather_2d(seq_hidden, table_indexes)
        column_encs = nn_utils.batch_gather_2d(seq_hidden, column_indexes)
        value_encs = nn_utils.batch_gather_2d(seq_hidden, value_indexes)
        if self.enc_value_with_col:
            value_num = value_encs.shape[1] // 2
            value_encs = value_encs.reshape(
                [value_encs.shape[0], value_num, 2, -1]).sum(axis=2)

        orig_inputs = inputs['orig_inputs']
        column_pointer_maps = [{
            i: [i]
            for i in range(len(orig_input.columns))
        } for orig_input in orig_inputs]
        table_pointer_maps = [{i: [i]
                               for i in range(len(orig_input.tables))}
                              for orig_input in orig_inputs]
        value_pointer_maps = [{i: [i]
                               for i in range(len(orig_input.values))}
                              for orig_input in orig_inputs]

        enc_results = []
        # calculate relation encoding one-by-one
        for batch_idx, orig_input in enumerate(orig_inputs):
            q_len = orig_input.column_indexes[0] - 2
            col_size = len(orig_input.columns)
            tab_size = len(orig_input.tables)
            val_size = len(orig_input.values)

            q_enc = question_encs[batch_idx][:q_len]
            tab_enc = table_encs[batch_idx][:tab_size]
            col_enc = column_encs[batch_idx][:col_size]
            val_enc = value_encs[batch_idx][:val_size]

            c_boundary = list(range(col_size + 1))
            t_boundary = list(range(tab_size + 1))

            v_e_input = val_enc.unsqueeze(0) if self.rel_has_value else None
            (q_enc_new, c_enc_new, t_enc_new,
             v_enc_new), align_mat = self.encs_update.forward_unbatched(
                 q_enc.unsqueeze(0), col_enc.unsqueeze(0),
                 tab_enc.unsqueeze(0), c_boundary, t_boundary,
                 orig_input.relations, v_e_input)

            memory = []
            if 'question' in self.include_in_memory:
                memory.append(q_enc_new)
            if 'table' in self.include_in_memory:
                memory.append(t_enc_new)
            if 'column' in self.include_in_memory:
                memory.append(c_enc_new)
            if 'value' in self.include_in_memory and self.rel_has_value:
                memory.append(v_enc_new)
            memory = paddle.concat(memory, axis=1)
            if not self.rel_has_value:
                v_enc_new = val_enc.unsqueeze(0)
                m2v_align_mat = self.value_align(memory,
                                                 v_enc_new,
                                                 relations=None)
                align_mat[2] = m2v_align_mat

            schema_memory = (c_enc_new, t_enc_new)
            if self.rel_has_value:
                schema_memory += (v_enc_new, )

            enc_results.append(
                EncoderState(
                    state=None,
                    cls_hidden=cls_hidden[batch_idx],
                    memory=memory,
                    question_memory=q_enc_new,
                    schema_memory=paddle.concat(schema_memory, axis=1),
                    words=orig_input.question_tokens,
                    pointer_memories={
                        'table': t_enc_new,
                        'column': c_enc_new,
                        'value': v_enc_new,
                    },
                    pointer_maps={
                        'column': column_pointer_maps[batch_idx],
                        'table': table_pointer_maps[batch_idx],
                        'value': value_pointer_maps[batch_idx],
                    },
                    m2c_align_mat=align_mat[0],
                    m2t_align_mat=align_mat[1],
                    m2v_align_mat=align_mat[2],
                ))

        return enc_results
Beispiel #26
0
    def forward(self, x):
        bn_x = self.bnorm(x[:, :self.bnorm_channels, :, :])
        in_x = self.inorm(x[:, self.bnorm_channels:, :, :])

        return paddle.concat((bn_x, in_x), 1)
Beispiel #27
0
def predictEnsembleThree(model,
                         model_1,
                         model_crop,
                         model_path,
                         model_path_1,
                         model_path_crop,
                         transforms,
                         transforms_crop,
                         image_list,
                         image_dir=None,
                         save_dir='output',
                         aug_pred=False,
                         scales=1.0,
                         flip_horizontal=True,
                         flip_vertical=False,
                         is_slide=False,
                         stride=None,
                         crop_size=None):
    """
    predict and visualize the image_list.

    Args:
        model (nn.Layer): Used to predict for input image.
        model_path (str): The path of pretrained model.
        transforms (transform.Compose): Preprocess for input image.
        image_list (list): A list of image path to be predicted.
        image_dir (str, optional): The root directory of the images predicted. Default: None.
        save_dir (str, optional): The directory to save the visualized results. Default: 'output'.
        aug_pred (bool, optional): Whether to use mulit-scales and flip augment for predition. Default: False.
        scales (list|float, optional): Scales for augment. It is valid when `aug_pred` is True. Default: 1.0.
        flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_pred` is True. Default: True.
        flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_pred` is True. Default: False.
        is_slide (bool, optional): Whether to predict by sliding window. Default: False.
        stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height.
            It should be provided when `is_slide` is True.
        crop_size (tuple|list, optional):  The crop size of sliding window, the first is width and the second is height.
            It should be provided when `is_slide` is True.

    """
    utils.utils.load_entire_model(model, model_path)
    model.eval()
    utils.utils.load_entire_model(model_1, model_path_1)
    model_1.eval()
    utils.utils.load_entire_model(model_crop, model_path_crop)
    model_crop.eval()
    nranks = paddle.distributed.get_world_size()
    local_rank = paddle.distributed.get_rank()
    if nranks > 1:
        img_lists = partition_list(image_list, nranks)
    else:
        img_lists = [image_list]

    added_saved_dir = os.path.join(save_dir, 'added_prediction')
    pred_saved_dir = os.path.join(save_dir, 'pseudo_color_prediction')

    logger.info("Start to predict...")
    progbar_pred = progbar.Progbar(target=len(img_lists[0]), verbose=1)
    with paddle.no_grad():
        for i, im_path in enumerate(img_lists[local_rank]):
            im_origin = cv2.imread(im_path)
            ori_shape = im_origin.shape[:2]
            im, _ = transforms(im_origin)
            im = im[np.newaxis, ...]
            im = paddle.to_tensor(im)

            ims, _ = transforms_crop(im_origin)
            im1 = ims[:, 540:540 + 720, 320:320 + 1280]
            im2 = ims[:, 540:540 + 720, 960:960 + 1280]
            im3 = ims[:, 540:540 + 720, 1600:1600 + 1280]
            im1 = im1[np.newaxis, ...]
            im1 = paddle.to_tensor(im1)
            im2 = im2[np.newaxis, ...]
            im2 = paddle.to_tensor(im2)
            im3 = im3[np.newaxis, ...]
            im3 = paddle.to_tensor(im3)
            ims_ = [im1, im2, im3]

            if aug_pred:
                pred = infer_ensemble.aug_inference(
                    model,
                    model_1,
                    im,
                    ori_shape=ori_shape,
                    transforms=transforms.transforms,
                    scales=scales,
                    flip_horizontal=flip_horizontal,
                    flip_vertical=flip_vertical,
                    is_slide=is_slide,
                    stride=stride,
                    crop_size=crop_size)
            else:
                pred = infer_ensemble.inference(
                    model,
                    model_1,
                    im,
                    ori_shape=ori_shape,
                    transforms=transforms.transforms,
                    is_slide=is_slide,
                    stride=stride,
                    crop_size=crop_size)
            preds = []
            for ii in range(3):
                im_ = ims_[ii]
                if aug_pred:
                    pred_crop = infer_crop.aug_inference(
                        model,
                        im_,
                        ori_shape=ori_shape,
                        transforms=transforms.transforms,
                        scales=scales,
                        flip_horizontal=flip_horizontal,
                        flip_vertical=flip_vertical,
                        is_slide=is_slide,
                        stride=stride,
                        crop_size=crop_size)
                else:
                    pred_crop = infer_crop.inference(
                        model,
                        im_,
                        ori_shape=ori_shape,
                        transforms=transforms.transforms,
                        is_slide=is_slide,
                        stride=stride,
                        crop_size=crop_size)
                preds.append(pred_crop)

            left_ensem = (preds[0][:, :, :, 640:1280] +
                          preds[1][:, :, :, 0:640]) / 2
            right_ensem = (preds[1][:, :, :, 640:1280] +
                           preds[2][:, :, :, 0:640]) / 2
            pred_ensem = paddle.concat([
                preds[0][:, :, :, 0:640], left_ensem, right_ensem,
                preds[2][:, :, :, 640:1280]
            ],
                                       axis=3)
            logit = F.interpolate(pred_ensem, (432, 768), mode='bilinear')

            pred_logit = pred.clone()
            pred_logit[:, :, 324:756, 576:1344] = logit
            pred = pred + pred_logit
            pred = F.interpolate(pred, ori_shape, mode='bilinear')
            pred = paddle.argmax(pred, axis=1, keepdim=True, dtype='int32')
            pred = paddle.squeeze(pred)
            pred = pred.numpy().astype('uint8')

            # get the saved name
            if image_dir is not None:
                im_file = im_path.replace(image_dir, '')
            else:
                im_file = os.path.basename(im_path)
            if im_file[0] == '/':
                im_file = im_file[1:]

            # save added image
            added_image = utils.visualize.visualize(im_path, pred, weight=0.6)
            added_image_path = os.path.join(added_saved_dir, im_file)
            mkdir(added_image_path)
            cv2.imwrite(added_image_path, added_image)

            # save pseudo color prediction
            pred_mask = utils.visualize.get_pseudo_color_map(pred)
            pred_saved_path = os.path.join(pred_saved_dir,
                                           im_file.rsplit(".")[0] + ".png")
            mkdir(pred_saved_path)
            pred_mask.save(pred_saved_path)

            # pred_im = utils.visualize(im_path, pred, weight=0.0)
            # pred_saved_path = os.path.join(pred_saved_dir, im_file)
            # mkdir(pred_saved_path)
            # cv2.imwrite(pred_saved_path, pred_im)

            progbar_pred.update(i + 1)
Beispiel #28
0
def main(args):
    paddle.seed(12345)
    config = load_yaml(args.config_yaml)
    use_gpu = config.get("dygraph.use_gpu", True)
    train_data_dir = config.get("dygraph.train_data_dir", None)
    epochs = config.get("dygraph.epochs", None)
    print_interval = config.get("dygraph.print_interval", None)
    model_save_path = config.get("dygraph.model_save_path", "model_output")
    dense_input_dim = config.get('hyper_parameters.dense_input_dim', None)

    print("***********************************")
    logger.info(
        "use_gpu: {}, train_data_dir: {}, epochs: {}, print_interval: {}, model_save_path: {}"
        .format(use_gpu, train_data_dir, epochs, print_interval,
                model_save_path))
    print("***********************************")

    place = paddle.set_device('gpu' if use_gpu else 'cpu')

    fm_model = create_model(config)
    model_init_path = config.get("dygraph.model_init_path", None)
    if model_init_path is not None:
        load_model(model_init_path, fm_model)

    # to do : add optimizer function
    optimizer = paddle.optimizer.Adam(parameters=fm_model.parameters())

    file_list = [
        os.path.join(train_data_dir, x) for x in os.listdir(train_data_dir)
    ]
    print("read data")
    dataset = CriteoDataset(file_list)
    train_dataloader = create_data_loader(dataset, place=place, config=config)

    last_epoch_id = config.get("last_epoch", -1)

    for epoch_id in range(last_epoch_id + 1, epochs):
        # set train mode
        fm_model.train()
        auc_metric = paddle.metric.Auc("ROC")
        epoch_begin = time.time()
        interval_begin = time.time()
        train_reader_cost = 0.0
        train_run_cost = 0.0
        total_samples = 0
        reader_start = time.time()

        for batch_id, batch in enumerate(train_dataloader()):
            train_reader_cost += time.time() - reader_start
            optimizer.clear_grad()
            train_start = time.time()
            batch_size = len(batch[0])

            label, sparse_tensor, dense_tensor = create_feeds(
                batch, dense_input_dim)

            pred = fm_model(sparse_tensor, dense_tensor)
            loss = create_loss(pred, label)

            loss.backward()
            optimizer.step()
            train_run_cost += time.time() - train_start
            total_samples += batch_size

            label_int = paddle.cast(label, 'int64')

            # for auc
            predict_2d = paddle.concat(x=[1 - pred, pred], axis=1)
            auc_metric.update(preds=predict_2d.numpy(),
                              labels=label_int.numpy())

            if batch_id % print_interval == 0:
                logger.info(
                    "epoch: {}, batch_id: {}, auc: {:.6f}, avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec"
                    .format(
                        epoch_id, batch_id, auc_metric.accumulate(),
                        train_reader_cost / print_interval,
                        (train_reader_cost + train_run_cost) / print_interval,
                        total_samples / print_interval,
                        total_samples / (train_reader_cost + train_run_cost)))
                train_reader_cost = 0.0
                train_run_cost = 0.0
                total_samples = 0
            reader_start = time.time()

        logger.info("epoch: {} done, auc: {:.6f}, epoch time:{:.2f} s".format(
            epoch_id, auc_metric.accumulate(),
            time.time() - epoch_begin))

        save_model(fm_model,
                   optimizer,
                   model_save_path,
                   epoch_id,
                   prefix='rec')
    def get_loss(self, cate_preds, kernel_preds, ins_pred, ins_labels,
                 cate_labels, grid_order_list, fg_num):
        """
        Get loss of network of SOLOv2.

        Args:
            cate_preds (list): Tensor list of categroy branch output.
            kernel_preds (list): Tensor list of kernel branch output.
            ins_pred (list): Tensor list of instance branch output.
            ins_labels (list): List of instance labels pre batch.
            cate_labels (list): List of categroy labels pre batch.
            grid_order_list (list): List of index in pre grid.
            fg_num (int): Number of positive samples in a mini-batch.
        Returns:
            loss_ins (Tensor): The instance loss Tensor of SOLOv2 network.
            loss_cate (Tensor): The category loss Tensor of SOLOv2 network.
        """
        batch_size = paddle.shape(grid_order_list[0])[0]
        ins_pred_list = []
        for kernel_preds_level, grid_orders_level in zip(kernel_preds,
                                                         grid_order_list):
            if grid_orders_level.shape[1] == 0:
                ins_pred_list.append(None)
                continue
            grid_orders_level = paddle.reshape(grid_orders_level, [-1])
            reshape_pred = paddle.reshape(
                kernel_preds_level,
                shape=(paddle.shape(kernel_preds_level)[0],
                       paddle.shape(kernel_preds_level)[1], -1))
            reshape_pred = paddle.transpose(reshape_pred, [0, 2, 1])
            reshape_pred = paddle.reshape(
                reshape_pred, shape=(-1, paddle.shape(reshape_pred)[2]))
            gathered_pred = paddle.gather(reshape_pred, index=grid_orders_level)
            gathered_pred = paddle.reshape(
                gathered_pred,
                shape=[batch_size, -1, paddle.shape(gathered_pred)[1]])
            cur_ins_pred = ins_pred
            cur_ins_pred = paddle.reshape(
                cur_ins_pred,
                shape=(paddle.shape(cur_ins_pred)[0],
                       paddle.shape(cur_ins_pred)[1], -1))
            ins_pred_conv = paddle.matmul(gathered_pred, cur_ins_pred)
            cur_ins_pred = paddle.reshape(
                ins_pred_conv,
                shape=(-1, paddle.shape(ins_pred)[-2],
                       paddle.shape(ins_pred)[-1]))
            ins_pred_list.append(cur_ins_pred)

        num_ins = paddle.sum(fg_num)
        cate_preds = [
            paddle.reshape(
                paddle.transpose(cate_pred, [0, 2, 3, 1]),
                shape=(-1, self.cate_out_channels)) for cate_pred in cate_preds
        ]
        flatten_cate_preds = paddle.concat(cate_preds)
        new_cate_labels = []
        for cate_label in cate_labels:
            new_cate_labels.append(paddle.reshape(cate_label, shape=[-1]))
        cate_labels = paddle.concat(new_cate_labels)

        loss_ins, loss_cate = self.solov2_loss(
            ins_pred_list, ins_labels, flatten_cate_preds, cate_labels, num_ins)

        return {'loss_ins': loss_ins, 'loss_cate': loss_cate}
Beispiel #30
0
    def forward(self, inputs_tensor, is_infer=0):
        # input
        inputs = inputs_tensor[0]  # sparse_tensor
        dense_tensor = inputs_tensor[1]
        self.btag_his = inputs[:, 0:50]
        self.cate_his = inputs[:, 50:100]
        self.brand_his = inputs[:, 100:150]
        self.mask = inputs[:, 150:200]
        self.match_mask = inputs[:, 200:250]

        self.uid = inputs[:, 250]
        self.cms_segid = inputs[:, 251]
        self.cms_group_id = inputs[:, 252]
        self.final_gender_code = inputs[:, 253]
        self.age_level = inputs[:, 254]
        self.pvalue_level = inputs[:, 255]
        self.shopping_level = inputs[:, 256]
        self.occupation = inputs[:, 257]
        self.new_user_class_level = inputs[:, 258]

        self.mid = inputs[:, 259]
        self.cate_id = inputs[:, 260]
        self.campaign_id = inputs[:, 261]
        self.customer = inputs[:, 262]
        self.brand = inputs[:, 263]
        self.price = dense_tensor.astype('float32')

        self.pid = inputs[:, 265]

        if is_infer == 0:
            self.labels = inputs[:, 266]

        # embedding layer
        self.uid_batch_embedded = self.uid_embeddings_var(self.uid)
        self.mid_batch_embedded = self.mid_embeddings_var(self.mid)
        self.cat_batch_embedded = self.cat_embeddings_var(self.cate_id)
        self.cat_his_batch_embedded = self.cat_embeddings_var(self.cate_his)
        self.brand_batch_embedded = self.brand_embeddings_var(self.brand)
        self.brand_his_batch_embedded = self.brand_embeddings_var(
            self.brand_his)
        self.btag_his_batch_embedded = self.btag_embeddings_var(self.btag_his)
        self.dm_btag_his_batch_embedded = self.dm_btag_embeddings_var(
            self.btag_his)
        self.campaign_id_batch_embedded = self.campaign_id_embeddings_var(
            self.campaign_id)
        self.customer_batch_embedded = self.customer_embeddings_var(
            self.customer)
        self.cms_segid_batch_embedded = self.cms_segid_embeddings_var(
            self.cms_segid)
        self.cms_group_id_batch_embedded = self.cms_group_id_embeddings_var(
            self.cms_group_id)
        self.final_gender_code_batch_embedded = self.final_gender_code_embeddings_var(
            self.final_gender_code)
        self.age_level_batch_embedded = self.age_level_embeddings_var(
            self.age_level)
        self.pvalue_level_batch_embedded = self.pvalue_level_embeddings_var(
            self.pvalue_level)
        self.shopping_level_batch_embedded = self.shopping_level_embeddings_var(
            self.shopping_level)
        self.occupation_batch_embedded = self.occupation_embeddings_var(
            self.occupation)
        self.new_user_class_level_batch_embedded = self.new_user_class_level_embeddings_var(
            self.new_user_class_level)
        self.pid_batch_embedded = self.pid_embeddings_var(self.pid)

        self.user_feat = paddle.concat([
            self.uid_batch_embedded, self.cms_segid_batch_embedded,
            self.cms_group_id_batch_embedded,
            self.final_gender_code_batch_embedded,
            self.age_level_batch_embedded, self.pvalue_level_batch_embedded,
            self.shopping_level_batch_embedded, self.occupation_batch_embedded,
            self.new_user_class_level_batch_embedded
        ], -1)
        self.item_his_eb = paddle.concat(
            [self.cat_his_batch_embedded, self.brand_his_batch_embedded], -1)

        self.item_his_eb_sum = paddle.sum(self.item_his_eb, 1)
        self.item_feat = paddle.concat([
            self.mid_batch_embedded, self.cat_batch_embedded,
            self.brand_batch_embedded, self.campaign_id_batch_embedded,
            self.customer_batch_embedded, self.price
        ], -1)
        self.item_eb = paddle.concat(
            [self.cat_batch_embedded, self.brand_batch_embedded], -1)
        self.context_feat = self.pid_batch_embedded

        self.position_his_eb = self.position_embeddings_var(
            self.position_his)  # T, E
        self.position_his_eb = paddle.tile(
            self.position_his_eb, [paddle.shape(self.mid)[0], 1])  # B*T, E
        self.position_his_eb = paddle.reshape(self.position_his_eb, [
            paddle.shape(self.mid)[0], -1,
            paddle.shape(self.position_his_eb)[1]
        ])  # B, T, E

        self.dm_position_his_eb = self.dm_position_embeddings_var(
            self.dm_position_his)  # T, E
        self.dm_position_his_eb = paddle.tile(
            self.dm_position_his_eb, [paddle.shape(self.mid)[0], 1])  # B*T, E
        self.dm_position_his_eb = paddle.reshape(self.dm_position_his_eb, [
            paddle.shape(self.mid)[0], -1,
            paddle.shape(self.dm_position_his_eb)[1]
        ])  # B, T, E

        self.position_his_eb = paddle.concat(
            [self.position_his_eb, self.btag_his_batch_embedded], -1)
        self.dm_position_his_eb = paddle.concat(
            [self.dm_position_his_eb, self.dm_btag_his_batch_embedded], -1)

        # User-to-Item Network
        # Auxiliary Match Network
        self.match_mask = paddle.cast(self.match_mask, 'float32')
        self.aux_loss, self.dm_user_vector, scores = self._deep_match(
            self.item_his_eb, self.dm_position_his_eb, self.mask,
            self.match_mask, self.cate_his, self.dm_item_vectors_var.weight,
            self.dm_item_biases, self.cate_size)
        self.aux_loss *= 0.1
        self.dm_item_vec = self.dm_item_vectors_var(self.cate_id)
        rel_u2i = paddle.sum(self.dm_user_vector * self.dm_item_vec,
                             -1,
                             keepdim=True)  # B,1
        self.rel_u2i = rel_u2i

        # Item-to-Item Network
        att_outputs, alphas, scores_unnorm = self._dmr_fcn_attention(
            self.item_eb, self.item_his_eb, self.position_his_eb, self.mask)
        rel_i2i = paddle.unsqueeze(paddle.sum(scores_unnorm, [1, 2]), -1)
        self.rel_i2i = rel_i2i
        self.scores = paddle.sum(alphas, 1)
        inp = paddle.concat([
            self.user_feat, self.item_feat, self.context_feat,
            self.item_his_eb_sum, self.item_eb * self.item_his_eb_sum, rel_u2i,
            rel_i2i, att_outputs
        ], -1)

        # build fcn net
        inp = self.inp_layer(inp)
        dnn0 = self.dnn0_layer(inp)
        dnn0 = self.dnn0_prelu(dnn0)
        dnn1 = self.dnn1_layer(dnn0)
        dnn1 = self.dnn1_prelu(dnn1)
        dnn2 = self.dnn2_layer(dnn1)
        dnn2 = self.dnn2_prelu(dnn2)
        dnn3 = self.dnn3_layer(dnn2)
        dnn3 = self.dnn3_prelu(dnn3)

        # prediction
        self.y_hat = F.sigmoid(dnn3)

        if is_infer == False:
            # Cross-entropy loss and optimizer initialization
            x = paddle.sum(dnn3, 1)
            BCE = paddle.nn.BCEWithLogitsLoss()
            ctr_loss = paddle.mean(BCE(x, label=self.labels.astype('float32')))
            self.ctr_loss = ctr_loss
            self.loss = self.ctr_loss + self.aux_loss

            return self.y_hat, self.loss
        else:
            return self.y_hat, paddle.ones(shape=[1])