Esempio n. 1
0
    def test_dygraph(self):
        paddle.disable_static()
        for place in self.places:
            with fluid.dygraph.guard(place):
                np_x = np.random.randint(0, 2, (12, 10)).astype(np.bool)
                x = fluid.layers.assign(np_x)
                x = fluid.layers.cast(x, 'bool')

                out1 = paddle.all(x)
                np_out1 = out1.numpy()
                expect_res1 = np.all(np_x)
                self.assertTrue((np_out1 == expect_res1).all())

                out2 = paddle.all(x, axis=0)
                np_out2 = out2.numpy()
                expect_res2 = np.all(np_x, axis=0)
                self.assertTrue((np_out2 == expect_res2).all())

                out3 = paddle.all(x, axis=-1)
                np_out3 = out3.numpy()
                expect_res3 = np.all(np_x, axis=-1)
                self.assertTrue((np_out3 == expect_res3).all())

                out4 = paddle.all(x, axis=1, keepdim=True)
                np_out4 = out4.numpy()
                expect_res4 = np.all(np_x, axis=1, keepdims=True)
                self.assertTrue((np_out4 == expect_res4).all())

        paddle.enable_static()
Esempio n. 2
0
def check_initial_inverse_hessian_estimate(H0):
    r"""Check whether the specified initial_inverse_hessian_estimate is symmetric and positive definite.
        Raise errors when precondition not met.

    Note: 
        In static graph can not raise error directly, so use py_func make raise_func as a op,
        and use paddle.static.nn.cond to decide if put the op in net.
        cholesky is the fast way to check positive definition, but in static graph can not catch 
        exception to raise value error, so use eigvals rather than cholesky in static graph.
    """
    is_symmetric = paddle.all(paddle.equal(H0, H0.t()))

    def raise_func():
        raise ValueError(
            "The initial_inverse_hessian_estimate should be symmetric and positive definite, but the specified is not."
        )

    if paddle.in_dynamic_mode():
        if not is_symmetric:
            raise_func()
        try:
            paddle.linalg.cholesky(H0)
        except RuntimeError as error:
            raise_func()
    else:

        def create_tmp_var(program, name, dtype, shape):
            return program.current_block().create_var(
                name=name, dtype=dtype, shape=shape)

        out_var = create_tmp_var(
            paddle.static.default_main_program(),
            name='output',
            dtype='float32',
            shape=[-1])

        def false_fn():
            paddle.static.nn.py_func(
                func=raise_func, x=is_symmetric, out=out_var)

        paddle.static.nn.cond(is_symmetric, None, false_fn)
        # eigvals only support cpu
        paddle.set_device("cpu")
        eigvals = paddle.paddle.linalg.eigvals(H0)
        is_positive = paddle.all(eigvals.real() > 0.) and paddle.all(
            eigvals.imag() == 0.)
        paddle.static.nn.cond(is_positive, None, false_fn)
Esempio n. 3
0
    def check_static_result(self, place):
        with fluid.program_guard(fluid.Program(), fluid.Program()):
            input = fluid.data(name="input", shape=[4, 4], dtype="bool")
            result = paddle.all(x=input)
            input_np = np.random.randint(0, 2, [4, 4]).astype("bool")

            exe = fluid.Executor(place)
            fetches = exe.run(fluid.default_main_program(),
                              feed={"input": input_np},
                              fetch_list=[result])
            self.assertTrue(np.allclose(fetches[0], np.all(input_np)))
Esempio n. 4
0
    def _unscale(self, optimizer):
        if not self._enable:
            return

        param_grads_dict = defaultdict(list)
        dist_param_grads_dict = defaultdict(list)
        if getattr(optimizer, '_param_groups', None) and isinstance(
                optimizer._param_groups[0], dict):
            for group in optimizer._param_groups:
                for param in group['params']:
                    if not param.is_distributed:
                        if param._grad_ivar() is not None:
                            param_grads_dict[param._grad_ivar().dtype].append(
                                param._grad_ivar())
                    else:
                        if param._grad_ivar() is not None:
                            dist_param_grads_dict[
                                param._grad_ivar().dtype].append(
                                    param._grad_ivar())
        else:
            for param in optimizer._parameter_list:
                if not param.is_distributed:
                    if param._grad_ivar() is not None:
                        param_grads_dict[param._grad_ivar().dtype].append(
                            param._grad_ivar())
                else:
                    if param._grad_ivar() is not None:
                        dist_param_grads_dict[param._grad_ivar().dtype].append(
                            param._grad_ivar())
        for dtype in dist_param_grads_dict:
            for grad in dist_param_grads_dict[dtype]:
                self._found_inf = paddle.logical_not(
                    paddle.all(paddle.isfinite(grad)))
                if self._found_inf:
                    print('Found inf or nan in classifier, dtype is', dtype)
                    return

        for dtype in param_grads_dict:
            param_grads = param_grads_dict[dtype]
            _C_ops.check_finite_and_unscale(param_grads, self._scale,
                                            param_grads, self._found_inf)
            if self._found_inf:
                print('Found inf or nan in backbone, dtype is', dtype)
                break
Esempio n. 5
0
    def step(self, optimizer):
        if int(self.sample_ratio) < 1:
            warnings.warn(
                "Explicitly call the function paddle._C_ops.sparse_momentum is a temporary manner. "
                "We will merge it to optimizer in the future, please don't follow.")
            
            found_inf = paddle.logical_not(
                paddle.all(paddle.isfinite(self._parameter_list[0].grad)))
            if found_inf:
                print('Found inf or nan in classifier')
            else:
                if self.weight.name not in optimizer._accumulators[
                        optimizer._velocity_acc_str]:
                    optimizer._add_accumulator(optimizer._velocity_acc_str,
                                               self.weight)

                velocity = optimizer._accumulators[
                    optimizer._velocity_acc_str][self.weight.name]
                _, _ = paddle._C_ops.sparse_momentum(
                    self.weight,
                    self._parameter_list[0].grad,
                    velocity,
                    self.index,
                    paddle.to_tensor(
                        optimizer.get_lr(), dtype='float32'),
                    self.weight,
                    velocity,
                    'mu',
                    optimizer._momentum,
                    'use_nesterov',
                    optimizer._use_nesterov,
                    'regularization_method',
                    optimizer._regularization_method,
                    'regularization_coeff',
                    optimizer._regularization_coeff,
                    'axis',
                    1)
Esempio n. 6
0
 def forward(self, inputs):
     """
     forward
     """
     x = paddle.all(inputs, axis=self.axis, keepdim=self.keepdim)
     return x
Esempio n. 7
0
    def sync_gradient_and_unscale(self, optimizer):
        if self.world_size <= 1 and self.grad_norm_clip is None and not self._enable:
            return

        # data parallel
        param_grads_dict = defaultdict(list)
        # model parallel
        dist_param_grads_dict = defaultdict(list)

        if getattr(optimizer, '_param_groups', None) and isinstance(
                optimizer._param_groups[0], dict):
            for group in optimizer._param_groups:
                for param in group['params']:
                    if not param.is_distributed:
                        if param._grad_ivar() is not None:
                            param_grads_dict[param._grad_ivar().dtype].append(
                                param._grad_ivar())
                    else:
                        if param._grad_ivar() is not None:
                            dist_param_grads_dict[param._grad_ivar(
                            ).dtype].append(param._grad_ivar())
                        elif getattr(param, 'sparse_grad', None) is not None:
                            grad = getattr(param, 'sparse_grad')
                            dist_param_grads_dict[grad.dtype].append(grad)
        else:
            for param in optimizer._parameter_list:
                if not param.is_distributed:
                    if param._grad_ivar() is not None:
                        param_grads_dict[param._grad_ivar().dtype].append(
                            param._grad_ivar())
                else:
                    if param._grad_ivar() is not None:
                        dist_param_grads_dict[param._grad_ivar().dtype].append(
                            param._grad_ivar())
                    elif getattr(param, 'sparse_grad', None) is not None:
                        grad = getattr(param, 'sparse_grad')
                        dist_param_grads_dict[grad.dtype].append(grad)

        if self._enable:
            for dtype in dist_param_grads_dict:
                for grad in dist_param_grads_dict[dtype]:
                    self._found_inf = paddle.logical_not(
                        paddle.all(paddle.isfinite(grad)))
                    if self._found_inf:
                        print(
                            'Found inf or nan of distributed parameter, dtype is',
                            dtype)
                        return

        grads_fp32 = []
        grads_fp16 = []
        if len(param_grads_dict[paddle.float32]) > 0:
            coalesced_grads_and_vars_fp32 = \
                paddle.fluid.dygraph.parallel.build_groups(param_grads_dict[paddle.float32], 128 * 1024 * 1024)
            for coalesced_grad, _, _ in coalesced_grads_and_vars_fp32:
                if self.world_size > 1:
                    paddle.distributed.all_reduce(coalesced_grad)
                grads_fp32.append(coalesced_grad)

            if self._enable:
                _C_ops.check_finite_and_unscale(grads_fp32, self._scale,
                                                grads_fp32, self._found_inf)
                if self._found_inf:
                    print(
                        'Found inf or nan of non distributed parameter, dtype is',
                        paddle.float32)
                    return

        if len(param_grads_dict[paddle.float16]) > 0:
            coalesced_grads_and_vars_fp16 = \
                paddle.fluid.dygraph.parallel.build_groups(param_grads_dict[paddle.float16], 128 * 1024 * 1024)
            for coalesced_grad, _, _ in coalesced_grads_and_vars_fp16:
                if self.world_size > 1:
                    paddle.distributed.all_reduce(coalesced_grad)
                grads_fp16.append(coalesced_grad)

            if self._enable:
                _C_ops.check_finite_and_unscale(grads_fp16, self._scale,
                                                grads_fp16, self._found_inf)
                if self._found_inf:
                    print(
                        'Found inf or nan non distributed parameter, dtype is',
                        paddle.float16)
                    return

        if self.grad_norm_clip is not None:
            clip_grad_norm_(grads_fp32, grads_fp16, self.grad_norm_clip,
                            self.grad_norm_clip_max)

        if len(param_grads_dict[paddle.float16]) > 0:
            paddle.fluid.dygraph.parallel._split_tensors(
                coalesced_grads_and_vars_fp16)
        if len(param_grads_dict[paddle.float32]) > 0:
            paddle.fluid.dygraph.parallel._split_tensors(
                coalesced_grads_and_vars_fp32)
Esempio n. 8
0
    def greedy_search(self,
                      src_word,
                      max_len=256,
                      waitk=-1,
                      caches=None,
                      bos_id=None):
        """
        greedy_search uses streaming reader. It doesn't need calling
        encoder many times, an a sub-sentence just needs calling encoder once.
        So, it needs previous state(caches) and last one of generated
        tokens id last time.
        """
        src_max_len = paddle.shape(src_word)[-1]
        base_attn_bias = paddle.cast(
            src_word == self.bos_id,
            dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9
        src_slf_attn_bias = base_attn_bias
        src_slf_attn_bias.stop_gradient = True
        trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, 1, 1])
        src_pos = paddle.cast(
            src_word != self.bos_id, dtype="int64") * paddle.arange(
                start=0, end=src_max_len)
        src_emb = self.src_word_embedding(src_word)
        src_pos_emb = self.src_pos_embedding(src_pos)
        src_emb = src_emb + src_pos_emb
        enc_input = F.dropout(
            src_emb, p=self.dropout,
            training=self.training) if self.dropout else src_emb
        enc_outputs = [self.encoder(enc_input, src_mask=src_slf_attn_bias)]

        # constant number
        batch_size = enc_outputs[-1].shape[0]
        max_len = (
            enc_outputs[-1].shape[1] + 20) if max_len is None else max_len
        end_token_tensor = paddle.full(
            shape=[batch_size, 1], fill_value=self.eos_id, dtype="int64")

        predict_ids = []
        log_probs = paddle.full(
            shape=[batch_size, 1], fill_value=0, dtype="float32")
        if not bos_id:
            trg_word = paddle.full(
                shape=[batch_size, 1], fill_value=self.bos_id, dtype="int64")
        else:
            trg_word = paddle.full(
                shape=[batch_size, 1], fill_value=bos_id, dtype="int64")

        # init states (caches) for transformer
        if not caches:
            caches = self.decoder.gen_cache(enc_outputs[-1], do_zip=False)

        for i in range(max_len):
            trg_pos = paddle.full(
                shape=trg_word.shape, fill_value=i, dtype="int64")
            trg_emb = self.trg_word_embedding(trg_word)
            trg_pos_emb = self.trg_pos_embedding(trg_pos)
            trg_emb = trg_emb + trg_pos_emb
            dec_input = F.dropout(
                trg_emb, p=self.dropout,
                training=self.training) if self.dropout else trg_emb

            if waitk < 0 or i >= len(enc_outputs):
                # if the decoder step is full sent or longer than all source
                # step, then read the whole src
                _e = enc_outputs[-1]
                dec_output, caches = self.decoder(
                    dec_input, [_e], None,
                    trg_src_attn_bias[:, :, :, :_e.shape[1]], caches)
            else:
                _e = enc_outputs[i]
                dec_output, caches = self.decoder(
                    dec_input, [_e], None,
                    trg_src_attn_bias[:, :, :, :_e.shape[1]], caches)

            dec_output = paddle.reshape(
                dec_output, shape=[-1, dec_output.shape[-1]])

            logits = self.linear(dec_output)
            step_log_probs = paddle.log(F.softmax(logits, axis=-1))
            log_probs = paddle.add(x=step_log_probs, y=log_probs)
            scores = log_probs
            topk_scores, topk_indices = paddle.topk(x=scores, k=1)

            finished = paddle.equal(topk_indices, end_token_tensor)
            trg_word = topk_indices
            log_probs = topk_scores

            predict_ids.append(topk_indices)

            if paddle.all(finished).numpy():
                break

        predict_ids = paddle.stack(predict_ids, axis=0)
        finished_seq = paddle.transpose(predict_ids, [1, 2, 0])
        finished_scores = topk_scores

        return finished_seq, finished_scores, caches
Esempio n. 9
0
    def greedy_search(self, src_word, max_len=256, waitk=-1):
        src_max_len = paddle.shape(src_word)[-1]
        base_attn_bias = paddle.cast(
            src_word == self.bos_id,
            dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9
        src_slf_attn_bias = base_attn_bias
        src_slf_attn_bias.stop_gradient = True
        trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, 1, 1])
        src_pos = paddle.cast(src_word != self.bos_id,
                              dtype="int64") * paddle.arange(start=0,
                                                             end=src_max_len)
        src_emb = self.src_word_embedding(src_word)
        src_pos_emb = self.src_pos_embedding(src_pos)
        src_emb = src_emb + src_pos_emb
        enc_input = F.dropout(
            src_emb, p=self.dropout,
            training=self.training) if self.dropout else src_emb
        if waitk < 0 or waitk > src_max_len:
            enc_outputs = [self.encoder(enc_input, src_mask=src_slf_attn_bias)]
        else:
            enc_outputs = []
            for i in range(waitk, src_max_len + 1):
                enc_output = self.encoder(
                    enc_input[:, :i, :],
                    src_mask=src_slf_attn_bias[:, :, :, :i])
                enc_outputs.append(enc_output)

        # constant number
        batch_size = enc_outputs[-1].shape[0]
        max_len = (enc_outputs[-1].shape[1] +
                   20) if max_len is None else max_len
        end_token_tensor = paddle.full(shape=[batch_size, 1],
                                       fill_value=self.eos_id,
                                       dtype="int64")

        predict_ids = []
        log_probs = paddle.full(shape=[batch_size, 1],
                                fill_value=0,
                                dtype="float32")
        trg_word = paddle.full(shape=[batch_size, 1],
                               fill_value=self.bos_id,
                               dtype="int64")

        # init states (caches) for transformer
        caches = self.decoder.gen_cache(enc_outputs[-1], do_zip=False)

        for i in range(max_len):
            trg_pos = paddle.full(shape=trg_word.shape,
                                  fill_value=i,
                                  dtype="int64")
            trg_emb = self.trg_word_embedding(trg_word)
            trg_pos_emb = self.trg_pos_embedding(trg_pos)
            trg_emb = trg_emb + trg_pos_emb
            dec_input = F.dropout(
                trg_emb, p=self.dropout,
                training=self.training) if self.dropout else trg_emb

            if waitk < 0 or i >= len(enc_outputs):
                # Avoid getting the whole source in advance, a diff from:
                # https://github.com/autosimtrans/SimulTransBaseline/blob/master/model.py#L1207
                # if the decoder step is full sent or longer than all source
                # step, then read the whole src
                _e = enc_outputs[-1]
                dec_output, caches = self.decoder(
                    dec_input, [_e], None,
                    trg_src_attn_bias[:, :, :, :_e.shape[1]], caches)
            else:
                _e = enc_outputs[i]
                dec_output, caches = self.decoder(
                    dec_input, [_e], None,
                    trg_src_attn_bias[:, :, :, :_e.shape[1]], caches)

            dec_output = paddle.reshape(dec_output,
                                        shape=[-1, dec_output.shape[-1]])

            logits = self.linear(dec_output)
            step_log_probs = paddle.log(F.softmax(logits, axis=-1))
            log_probs = paddle.add(x=step_log_probs, y=log_probs)
            scores = log_probs
            topk_scores, topk_indices = paddle.topk(x=scores, k=1)

            finished = paddle.equal(topk_indices, end_token_tensor)
            trg_word = topk_indices
            log_probs = topk_scores

            predict_ids.append(topk_indices)

            if paddle.all(finished).numpy():
                break

        predict_ids = paddle.stack(predict_ids, axis=0)
        finished_seq = paddle.transpose(predict_ids, [1, 2, 0])
        finished_scores = topk_scores

        return finished_seq, finished_scores
Esempio n. 10
0
    def forward(self, src_word):
        src_max_len = paddle.shape(src_word)[-1]
        mem_seq_lens = paddle.sum(paddle.cast(src_word != self.bos_id,
                                              dtype="int32"),
                                  axis=-1,
                                  keepdim=True)

        src_slf_attn_bias = paddle.cast(
            src_word == self.bos_id,
            dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9

        src_slf_attn_bias.stop_gradient = True

        src_pos = paddle.cast(src_word != self.bos_id,
                              dtype="int64") * paddle.arange(start=0,
                                                             end=src_max_len)

        src_emb = self.src_word_embedding(src_word)

        src_pos_emb = self.src_pos_embedding(src_pos)
        src_emb = src_emb + src_pos_emb
        enc_input = F.dropout(
            src_emb, p=self.dropout,
            training=self.training) if self.dropout else src_emb
        enc_output = self.transformer.encoder(enc_input,
                                              src_mask=src_slf_attn_bias)

        batch_size = enc_output.shape[0]
        end_token_tensor = paddle.full(shape=[batch_size, 1],
                                       fill_value=self.eos_id,
                                       dtype="int64")

        predict_ids = []
        log_probs = paddle.full(shape=[batch_size, 1],
                                fill_value=0,
                                dtype="float32")
        trg_word = paddle.full(shape=[batch_size, 1],
                               fill_value=self.bos_id,
                               dtype="int64")

        if self.use_fp16_decoder:
            enc_output = paddle.cast(enc_output, "float16")

        # Init cache
        self_cache = paddle.zeros(
            shape=[self.num_decoder_layers, 2, 0, batch_size, self.d_model],
            dtype=enc_output.dtype)
        mem_cache = paddle.zeros(shape=[
            self.num_decoder_layers, 2, batch_size, src_max_len, self.d_model
        ],
                                 dtype=enc_output.dtype)

        for i in range(self.max_out_len):
            trg_pos = paddle.full(shape=trg_word.shape,
                                  fill_value=i,
                                  dtype="int64")
            trg_emb = self.trg_word_embedding(trg_word)
            trg_pos_emb = self.trg_pos_embedding(trg_pos)
            trg_emb = trg_emb + trg_pos_emb
            dec_input = F.dropout(
                trg_emb, p=self.dropout,
                training=self.training) if self.dropout else trg_emb

            # TODO(gongenlei): do cast in op
            if self.use_fp16_decoder:
                dec_input = paddle.cast(dec_input, "float16")

            dec_output, self_cache, mem_cache = self.decoder(
                from_tensor=dec_input,
                memory_tensor=enc_output,
                mem_seq_len=mem_seq_lens,
                self_cache=self_cache,
                mem_cache=mem_cache)

            if self.use_fp16_decoder:
                dec_output = paddle.cast(dec_output, "float32")

            dec_output = paddle.reshape(dec_output,
                                        shape=[-1, dec_output.shape[-1]])

            logits = self.linear(dec_output)
            step_log_probs = paddle.log(F.softmax(logits, axis=-1))
            log_probs = paddle.add(x=step_log_probs, y=log_probs)
            scores = log_probs
            topk_scores, topk_indices = paddle.topk(x=scores, k=1)

            finished = paddle.equal(topk_indices, end_token_tensor)
            trg_word = topk_indices
            log_probs = topk_scores

            predict_ids.append(topk_indices)

            # TODO(gongenlei): support static graph
            if paddle.all(finished).numpy():
                break

        predict_ids = paddle.stack(predict_ids, axis=0)
        finished_seq = paddle.transpose(predict_ids, [1, 2, 0])
        finished_scores = topk_scores

        return finished_seq, finished_scores
Esempio n. 11
0
 def __call__(self, value):
     return paddle.all(value >= 0, axis=-1) and (
         (value.sum(-1) - 1).abs() < 1e-6)
Esempio n. 12
0
def merge_semantic_and_instance(semantic, instance, label_divisor, thing_list,
                                stuff_area, ignore_index):
    """
    Post-processing for panoptic segmentation, by merging semantic segmentation label and class agnostic
        instance segmentation label.

    Args:
        semantic (Tensor): A Tensor of shape [1, H, W], predicted semantic label.
        instance (Tensor): A Tensor of shape [1, H, W], predicted instance label.
        label_divisor (int): An Integer, used to convert panoptic id = semantic id * label_divisor + instance_id.
        thing_list (list): A List of thing class id.
        stuff_area (int): An Integer, remove stuff whose area is less tan stuff_area.
        ignore_index (int): Specifies a value that is ignored.

    Returns:
        Tensor: A Tensor of shape [1, H, W] . The pixels whose value equaling ignore_index is ignored.
            The stuff class is represented as format like class_id, while
            thing class as class_id * label_divisor + ins_id and ins_id begin from 1.
    """
    # In case thing mask does not align with semantic prediction
    pan_seg = paddle.zeros_like(semantic) + ignore_index
    thing_seg = instance > 0
    semantic_thing_seg = paddle.zeros_like(semantic)
    for thing_class in thing_list:
        semantic_thing_seg += semantic == thing_class

    # keep track of instance id for each class
    class_id_tracker = {}

    # paste thing by majority voting
    ins_ids = paddle.unique(instance)
    for ins_id in ins_ids:
        if ins_id == 0:
            continue
        # Make sure only do majority voting within semantic_thing_seg
        thing_mask = paddle.logical_and(instance == ins_id,
                                        semantic_thing_seg == 1)
        if paddle.all(paddle.logical_not(thing_mask)):
            continue
        # get class id for instance of ins_id
        sem_ins_id = paddle.gather(semantic.reshape(
            (-1, )), paddle.nonzero(thing_mask.reshape(
                (-1, ))))  # equal to semantic[thing_mask]
        v, c = paddle.unique(sem_ins_id, return_counts=True)
        class_id = paddle.gather(v, c.argmax())
        class_id = class_id.numpy()[0]
        if class_id in class_id_tracker:
            new_ins_id = class_id_tracker[class_id]
        else:
            class_id_tracker[class_id] = 1
            new_ins_id = 1
        class_id_tracker[class_id] += 1

        # pan_seg[thing_mask] = class_id * label_divisor + new_ins_id
        pan_seg = pan_seg * (paddle.logical_not(thing_mask)) + (
            class_id * label_divisor + new_ins_id) * thing_mask.astype('int64')

    # paste stuff to unoccupied area
    class_ids = paddle.unique(semantic)
    for class_id in class_ids:
        if class_id.numpy() in thing_list:
            # thing class
            continue
        # calculate stuff area
        stuff_mask = paddle.logical_and(semantic == class_id,
                                        paddle.logical_not(thing_seg))
        area = paddle.sum(stuff_mask.astype('int64'))
        if area >= stuff_area:
            # pan_seg[stuff_mask] = class_id
            pan_seg = pan_seg * (paddle.logical_not(stuff_mask)
                                 ) + stuff_mask.astype('int64') * class_id

    return pan_seg