def test_dygraph(self): paddle.disable_static() for place in self.places: with fluid.dygraph.guard(place): np_x = np.random.randint(0, 2, (12, 10)).astype(np.bool) x = fluid.layers.assign(np_x) x = fluid.layers.cast(x, 'bool') out1 = paddle.all(x) np_out1 = out1.numpy() expect_res1 = np.all(np_x) self.assertTrue((np_out1 == expect_res1).all()) out2 = paddle.all(x, axis=0) np_out2 = out2.numpy() expect_res2 = np.all(np_x, axis=0) self.assertTrue((np_out2 == expect_res2).all()) out3 = paddle.all(x, axis=-1) np_out3 = out3.numpy() expect_res3 = np.all(np_x, axis=-1) self.assertTrue((np_out3 == expect_res3).all()) out4 = paddle.all(x, axis=1, keepdim=True) np_out4 = out4.numpy() expect_res4 = np.all(np_x, axis=1, keepdims=True) self.assertTrue((np_out4 == expect_res4).all()) paddle.enable_static()
def check_initial_inverse_hessian_estimate(H0): r"""Check whether the specified initial_inverse_hessian_estimate is symmetric and positive definite. Raise errors when precondition not met. Note: In static graph can not raise error directly, so use py_func make raise_func as a op, and use paddle.static.nn.cond to decide if put the op in net. cholesky is the fast way to check positive definition, but in static graph can not catch exception to raise value error, so use eigvals rather than cholesky in static graph. """ is_symmetric = paddle.all(paddle.equal(H0, H0.t())) def raise_func(): raise ValueError( "The initial_inverse_hessian_estimate should be symmetric and positive definite, but the specified is not." ) if paddle.in_dynamic_mode(): if not is_symmetric: raise_func() try: paddle.linalg.cholesky(H0) except RuntimeError as error: raise_func() else: def create_tmp_var(program, name, dtype, shape): return program.current_block().create_var( name=name, dtype=dtype, shape=shape) out_var = create_tmp_var( paddle.static.default_main_program(), name='output', dtype='float32', shape=[-1]) def false_fn(): paddle.static.nn.py_func( func=raise_func, x=is_symmetric, out=out_var) paddle.static.nn.cond(is_symmetric, None, false_fn) # eigvals only support cpu paddle.set_device("cpu") eigvals = paddle.paddle.linalg.eigvals(H0) is_positive = paddle.all(eigvals.real() > 0.) and paddle.all( eigvals.imag() == 0.) paddle.static.nn.cond(is_positive, None, false_fn)
def check_static_result(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): input = fluid.data(name="input", shape=[4, 4], dtype="bool") result = paddle.all(x=input) input_np = np.random.randint(0, 2, [4, 4]).astype("bool") exe = fluid.Executor(place) fetches = exe.run(fluid.default_main_program(), feed={"input": input_np}, fetch_list=[result]) self.assertTrue(np.allclose(fetches[0], np.all(input_np)))
def _unscale(self, optimizer): if not self._enable: return param_grads_dict = defaultdict(list) dist_param_grads_dict = defaultdict(list) if getattr(optimizer, '_param_groups', None) and isinstance( optimizer._param_groups[0], dict): for group in optimizer._param_groups: for param in group['params']: if not param.is_distributed: if param._grad_ivar() is not None: param_grads_dict[param._grad_ivar().dtype].append( param._grad_ivar()) else: if param._grad_ivar() is not None: dist_param_grads_dict[ param._grad_ivar().dtype].append( param._grad_ivar()) else: for param in optimizer._parameter_list: if not param.is_distributed: if param._grad_ivar() is not None: param_grads_dict[param._grad_ivar().dtype].append( param._grad_ivar()) else: if param._grad_ivar() is not None: dist_param_grads_dict[param._grad_ivar().dtype].append( param._grad_ivar()) for dtype in dist_param_grads_dict: for grad in dist_param_grads_dict[dtype]: self._found_inf = paddle.logical_not( paddle.all(paddle.isfinite(grad))) if self._found_inf: print('Found inf or nan in classifier, dtype is', dtype) return for dtype in param_grads_dict: param_grads = param_grads_dict[dtype] _C_ops.check_finite_and_unscale(param_grads, self._scale, param_grads, self._found_inf) if self._found_inf: print('Found inf or nan in backbone, dtype is', dtype) break
def step(self, optimizer): if int(self.sample_ratio) < 1: warnings.warn( "Explicitly call the function paddle._C_ops.sparse_momentum is a temporary manner. " "We will merge it to optimizer in the future, please don't follow.") found_inf = paddle.logical_not( paddle.all(paddle.isfinite(self._parameter_list[0].grad))) if found_inf: print('Found inf or nan in classifier') else: if self.weight.name not in optimizer._accumulators[ optimizer._velocity_acc_str]: optimizer._add_accumulator(optimizer._velocity_acc_str, self.weight) velocity = optimizer._accumulators[ optimizer._velocity_acc_str][self.weight.name] _, _ = paddle._C_ops.sparse_momentum( self.weight, self._parameter_list[0].grad, velocity, self.index, paddle.to_tensor( optimizer.get_lr(), dtype='float32'), self.weight, velocity, 'mu', optimizer._momentum, 'use_nesterov', optimizer._use_nesterov, 'regularization_method', optimizer._regularization_method, 'regularization_coeff', optimizer._regularization_coeff, 'axis', 1)
def forward(self, inputs): """ forward """ x = paddle.all(inputs, axis=self.axis, keepdim=self.keepdim) return x
def sync_gradient_and_unscale(self, optimizer): if self.world_size <= 1 and self.grad_norm_clip is None and not self._enable: return # data parallel param_grads_dict = defaultdict(list) # model parallel dist_param_grads_dict = defaultdict(list) if getattr(optimizer, '_param_groups', None) and isinstance( optimizer._param_groups[0], dict): for group in optimizer._param_groups: for param in group['params']: if not param.is_distributed: if param._grad_ivar() is not None: param_grads_dict[param._grad_ivar().dtype].append( param._grad_ivar()) else: if param._grad_ivar() is not None: dist_param_grads_dict[param._grad_ivar( ).dtype].append(param._grad_ivar()) elif getattr(param, 'sparse_grad', None) is not None: grad = getattr(param, 'sparse_grad') dist_param_grads_dict[grad.dtype].append(grad) else: for param in optimizer._parameter_list: if not param.is_distributed: if param._grad_ivar() is not None: param_grads_dict[param._grad_ivar().dtype].append( param._grad_ivar()) else: if param._grad_ivar() is not None: dist_param_grads_dict[param._grad_ivar().dtype].append( param._grad_ivar()) elif getattr(param, 'sparse_grad', None) is not None: grad = getattr(param, 'sparse_grad') dist_param_grads_dict[grad.dtype].append(grad) if self._enable: for dtype in dist_param_grads_dict: for grad in dist_param_grads_dict[dtype]: self._found_inf = paddle.logical_not( paddle.all(paddle.isfinite(grad))) if self._found_inf: print( 'Found inf or nan of distributed parameter, dtype is', dtype) return grads_fp32 = [] grads_fp16 = [] if len(param_grads_dict[paddle.float32]) > 0: coalesced_grads_and_vars_fp32 = \ paddle.fluid.dygraph.parallel.build_groups(param_grads_dict[paddle.float32], 128 * 1024 * 1024) for coalesced_grad, _, _ in coalesced_grads_and_vars_fp32: if self.world_size > 1: paddle.distributed.all_reduce(coalesced_grad) grads_fp32.append(coalesced_grad) if self._enable: _C_ops.check_finite_and_unscale(grads_fp32, self._scale, grads_fp32, self._found_inf) if self._found_inf: print( 'Found inf or nan of non distributed parameter, dtype is', paddle.float32) return if len(param_grads_dict[paddle.float16]) > 0: coalesced_grads_and_vars_fp16 = \ paddle.fluid.dygraph.parallel.build_groups(param_grads_dict[paddle.float16], 128 * 1024 * 1024) for coalesced_grad, _, _ in coalesced_grads_and_vars_fp16: if self.world_size > 1: paddle.distributed.all_reduce(coalesced_grad) grads_fp16.append(coalesced_grad) if self._enable: _C_ops.check_finite_and_unscale(grads_fp16, self._scale, grads_fp16, self._found_inf) if self._found_inf: print( 'Found inf or nan non distributed parameter, dtype is', paddle.float16) return if self.grad_norm_clip is not None: clip_grad_norm_(grads_fp32, grads_fp16, self.grad_norm_clip, self.grad_norm_clip_max) if len(param_grads_dict[paddle.float16]) > 0: paddle.fluid.dygraph.parallel._split_tensors( coalesced_grads_and_vars_fp16) if len(param_grads_dict[paddle.float32]) > 0: paddle.fluid.dygraph.parallel._split_tensors( coalesced_grads_and_vars_fp32)
def greedy_search(self, src_word, max_len=256, waitk=-1, caches=None, bos_id=None): """ greedy_search uses streaming reader. It doesn't need calling encoder many times, an a sub-sentence just needs calling encoder once. So, it needs previous state(caches) and last one of generated tokens id last time. """ src_max_len = paddle.shape(src_word)[-1] base_attn_bias = paddle.cast( src_word == self.bos_id, dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 src_slf_attn_bias = base_attn_bias src_slf_attn_bias.stop_gradient = True trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, 1, 1]) src_pos = paddle.cast( src_word != self.bos_id, dtype="int64") * paddle.arange( start=0, end=src_max_len) src_emb = self.src_word_embedding(src_word) src_pos_emb = self.src_pos_embedding(src_pos) src_emb = src_emb + src_pos_emb enc_input = F.dropout( src_emb, p=self.dropout, training=self.training) if self.dropout else src_emb enc_outputs = [self.encoder(enc_input, src_mask=src_slf_attn_bias)] # constant number batch_size = enc_outputs[-1].shape[0] max_len = ( enc_outputs[-1].shape[1] + 20) if max_len is None else max_len end_token_tensor = paddle.full( shape=[batch_size, 1], fill_value=self.eos_id, dtype="int64") predict_ids = [] log_probs = paddle.full( shape=[batch_size, 1], fill_value=0, dtype="float32") if not bos_id: trg_word = paddle.full( shape=[batch_size, 1], fill_value=self.bos_id, dtype="int64") else: trg_word = paddle.full( shape=[batch_size, 1], fill_value=bos_id, dtype="int64") # init states (caches) for transformer if not caches: caches = self.decoder.gen_cache(enc_outputs[-1], do_zip=False) for i in range(max_len): trg_pos = paddle.full( shape=trg_word.shape, fill_value=i, dtype="int64") trg_emb = self.trg_word_embedding(trg_word) trg_pos_emb = self.trg_pos_embedding(trg_pos) trg_emb = trg_emb + trg_pos_emb dec_input = F.dropout( trg_emb, p=self.dropout, training=self.training) if self.dropout else trg_emb if waitk < 0 or i >= len(enc_outputs): # if the decoder step is full sent or longer than all source # step, then read the whole src _e = enc_outputs[-1] dec_output, caches = self.decoder( dec_input, [_e], None, trg_src_attn_bias[:, :, :, :_e.shape[1]], caches) else: _e = enc_outputs[i] dec_output, caches = self.decoder( dec_input, [_e], None, trg_src_attn_bias[:, :, :, :_e.shape[1]], caches) dec_output = paddle.reshape( dec_output, shape=[-1, dec_output.shape[-1]]) logits = self.linear(dec_output) step_log_probs = paddle.log(F.softmax(logits, axis=-1)) log_probs = paddle.add(x=step_log_probs, y=log_probs) scores = log_probs topk_scores, topk_indices = paddle.topk(x=scores, k=1) finished = paddle.equal(topk_indices, end_token_tensor) trg_word = topk_indices log_probs = topk_scores predict_ids.append(topk_indices) if paddle.all(finished).numpy(): break predict_ids = paddle.stack(predict_ids, axis=0) finished_seq = paddle.transpose(predict_ids, [1, 2, 0]) finished_scores = topk_scores return finished_seq, finished_scores, caches
def greedy_search(self, src_word, max_len=256, waitk=-1): src_max_len = paddle.shape(src_word)[-1] base_attn_bias = paddle.cast( src_word == self.bos_id, dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 src_slf_attn_bias = base_attn_bias src_slf_attn_bias.stop_gradient = True trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, 1, 1]) src_pos = paddle.cast(src_word != self.bos_id, dtype="int64") * paddle.arange(start=0, end=src_max_len) src_emb = self.src_word_embedding(src_word) src_pos_emb = self.src_pos_embedding(src_pos) src_emb = src_emb + src_pos_emb enc_input = F.dropout( src_emb, p=self.dropout, training=self.training) if self.dropout else src_emb if waitk < 0 or waitk > src_max_len: enc_outputs = [self.encoder(enc_input, src_mask=src_slf_attn_bias)] else: enc_outputs = [] for i in range(waitk, src_max_len + 1): enc_output = self.encoder( enc_input[:, :i, :], src_mask=src_slf_attn_bias[:, :, :, :i]) enc_outputs.append(enc_output) # constant number batch_size = enc_outputs[-1].shape[0] max_len = (enc_outputs[-1].shape[1] + 20) if max_len is None else max_len end_token_tensor = paddle.full(shape=[batch_size, 1], fill_value=self.eos_id, dtype="int64") predict_ids = [] log_probs = paddle.full(shape=[batch_size, 1], fill_value=0, dtype="float32") trg_word = paddle.full(shape=[batch_size, 1], fill_value=self.bos_id, dtype="int64") # init states (caches) for transformer caches = self.decoder.gen_cache(enc_outputs[-1], do_zip=False) for i in range(max_len): trg_pos = paddle.full(shape=trg_word.shape, fill_value=i, dtype="int64") trg_emb = self.trg_word_embedding(trg_word) trg_pos_emb = self.trg_pos_embedding(trg_pos) trg_emb = trg_emb + trg_pos_emb dec_input = F.dropout( trg_emb, p=self.dropout, training=self.training) if self.dropout else trg_emb if waitk < 0 or i >= len(enc_outputs): # Avoid getting the whole source in advance, a diff from: # https://github.com/autosimtrans/SimulTransBaseline/blob/master/model.py#L1207 # if the decoder step is full sent or longer than all source # step, then read the whole src _e = enc_outputs[-1] dec_output, caches = self.decoder( dec_input, [_e], None, trg_src_attn_bias[:, :, :, :_e.shape[1]], caches) else: _e = enc_outputs[i] dec_output, caches = self.decoder( dec_input, [_e], None, trg_src_attn_bias[:, :, :, :_e.shape[1]], caches) dec_output = paddle.reshape(dec_output, shape=[-1, dec_output.shape[-1]]) logits = self.linear(dec_output) step_log_probs = paddle.log(F.softmax(logits, axis=-1)) log_probs = paddle.add(x=step_log_probs, y=log_probs) scores = log_probs topk_scores, topk_indices = paddle.topk(x=scores, k=1) finished = paddle.equal(topk_indices, end_token_tensor) trg_word = topk_indices log_probs = topk_scores predict_ids.append(topk_indices) if paddle.all(finished).numpy(): break predict_ids = paddle.stack(predict_ids, axis=0) finished_seq = paddle.transpose(predict_ids, [1, 2, 0]) finished_scores = topk_scores return finished_seq, finished_scores
def forward(self, src_word): src_max_len = paddle.shape(src_word)[-1] mem_seq_lens = paddle.sum(paddle.cast(src_word != self.bos_id, dtype="int32"), axis=-1, keepdim=True) src_slf_attn_bias = paddle.cast( src_word == self.bos_id, dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 src_slf_attn_bias.stop_gradient = True src_pos = paddle.cast(src_word != self.bos_id, dtype="int64") * paddle.arange(start=0, end=src_max_len) src_emb = self.src_word_embedding(src_word) src_pos_emb = self.src_pos_embedding(src_pos) src_emb = src_emb + src_pos_emb enc_input = F.dropout( src_emb, p=self.dropout, training=self.training) if self.dropout else src_emb enc_output = self.transformer.encoder(enc_input, src_mask=src_slf_attn_bias) batch_size = enc_output.shape[0] end_token_tensor = paddle.full(shape=[batch_size, 1], fill_value=self.eos_id, dtype="int64") predict_ids = [] log_probs = paddle.full(shape=[batch_size, 1], fill_value=0, dtype="float32") trg_word = paddle.full(shape=[batch_size, 1], fill_value=self.bos_id, dtype="int64") if self.use_fp16_decoder: enc_output = paddle.cast(enc_output, "float16") # Init cache self_cache = paddle.zeros( shape=[self.num_decoder_layers, 2, 0, batch_size, self.d_model], dtype=enc_output.dtype) mem_cache = paddle.zeros(shape=[ self.num_decoder_layers, 2, batch_size, src_max_len, self.d_model ], dtype=enc_output.dtype) for i in range(self.max_out_len): trg_pos = paddle.full(shape=trg_word.shape, fill_value=i, dtype="int64") trg_emb = self.trg_word_embedding(trg_word) trg_pos_emb = self.trg_pos_embedding(trg_pos) trg_emb = trg_emb + trg_pos_emb dec_input = F.dropout( trg_emb, p=self.dropout, training=self.training) if self.dropout else trg_emb # TODO(gongenlei): do cast in op if self.use_fp16_decoder: dec_input = paddle.cast(dec_input, "float16") dec_output, self_cache, mem_cache = self.decoder( from_tensor=dec_input, memory_tensor=enc_output, mem_seq_len=mem_seq_lens, self_cache=self_cache, mem_cache=mem_cache) if self.use_fp16_decoder: dec_output = paddle.cast(dec_output, "float32") dec_output = paddle.reshape(dec_output, shape=[-1, dec_output.shape[-1]]) logits = self.linear(dec_output) step_log_probs = paddle.log(F.softmax(logits, axis=-1)) log_probs = paddle.add(x=step_log_probs, y=log_probs) scores = log_probs topk_scores, topk_indices = paddle.topk(x=scores, k=1) finished = paddle.equal(topk_indices, end_token_tensor) trg_word = topk_indices log_probs = topk_scores predict_ids.append(topk_indices) # TODO(gongenlei): support static graph if paddle.all(finished).numpy(): break predict_ids = paddle.stack(predict_ids, axis=0) finished_seq = paddle.transpose(predict_ids, [1, 2, 0]) finished_scores = topk_scores return finished_seq, finished_scores
def __call__(self, value): return paddle.all(value >= 0, axis=-1) and ( (value.sum(-1) - 1).abs() < 1e-6)
def merge_semantic_and_instance(semantic, instance, label_divisor, thing_list, stuff_area, ignore_index): """ Post-processing for panoptic segmentation, by merging semantic segmentation label and class agnostic instance segmentation label. Args: semantic (Tensor): A Tensor of shape [1, H, W], predicted semantic label. instance (Tensor): A Tensor of shape [1, H, W], predicted instance label. label_divisor (int): An Integer, used to convert panoptic id = semantic id * label_divisor + instance_id. thing_list (list): A List of thing class id. stuff_area (int): An Integer, remove stuff whose area is less tan stuff_area. ignore_index (int): Specifies a value that is ignored. Returns: Tensor: A Tensor of shape [1, H, W] . The pixels whose value equaling ignore_index is ignored. The stuff class is represented as format like class_id, while thing class as class_id * label_divisor + ins_id and ins_id begin from 1. """ # In case thing mask does not align with semantic prediction pan_seg = paddle.zeros_like(semantic) + ignore_index thing_seg = instance > 0 semantic_thing_seg = paddle.zeros_like(semantic) for thing_class in thing_list: semantic_thing_seg += semantic == thing_class # keep track of instance id for each class class_id_tracker = {} # paste thing by majority voting ins_ids = paddle.unique(instance) for ins_id in ins_ids: if ins_id == 0: continue # Make sure only do majority voting within semantic_thing_seg thing_mask = paddle.logical_and(instance == ins_id, semantic_thing_seg == 1) if paddle.all(paddle.logical_not(thing_mask)): continue # get class id for instance of ins_id sem_ins_id = paddle.gather(semantic.reshape( (-1, )), paddle.nonzero(thing_mask.reshape( (-1, )))) # equal to semantic[thing_mask] v, c = paddle.unique(sem_ins_id, return_counts=True) class_id = paddle.gather(v, c.argmax()) class_id = class_id.numpy()[0] if class_id in class_id_tracker: new_ins_id = class_id_tracker[class_id] else: class_id_tracker[class_id] = 1 new_ins_id = 1 class_id_tracker[class_id] += 1 # pan_seg[thing_mask] = class_id * label_divisor + new_ins_id pan_seg = pan_seg * (paddle.logical_not(thing_mask)) + ( class_id * label_divisor + new_ins_id) * thing_mask.astype('int64') # paste stuff to unoccupied area class_ids = paddle.unique(semantic) for class_id in class_ids: if class_id.numpy() in thing_list: # thing class continue # calculate stuff area stuff_mask = paddle.logical_and(semantic == class_id, paddle.logical_not(thing_seg)) area = paddle.sum(stuff_mask.astype('int64')) if area >= stuff_area: # pan_seg[stuff_mask] = class_id pan_seg = pan_seg * (paddle.logical_not(stuff_mask) ) + stuff_mask.astype('int64') * class_id return pan_seg