def validation(self, epoch, log_iters=10, save_epoch=1): log_prefix = 'Val' + self.task_prefix.capitalize() for metric in self.val_metrics: metric.reset_epoch_stats() val_loss = 0 losses_logging = defaultdict(list) self.net.eval() with paddle.no_grad(): for i, batch_data in enumerate(self.val_data): val_global_step = epoch * len(self.val_data) + i loss, batch_losses_logging, splitted_batch_data, outputs = \ self.batch_forward(batch_data, validation=True) batch_losses_logging['overall'] = loss reduce_loss_dict(batch_losses_logging) for loss_name, loss_value in batch_losses_logging.items(): losses_logging[loss_name].append(loss_value.numpy()) val_loss += batch_losses_logging['overall'].numpy() if self.is_master: logger.info( f'Epoch {epoch}, validation loss: {val_loss[0]/(i + 1):.4f}' ) for metric in self.val_metrics: metric.log_states( self.sw, f'{log_prefix}Metrics/{metric.name}', val_global_step) if self.is_master: for loss_name, loss_values in losses_logging.items(): self.sw.add_scalar(f'{log_prefix}Losses/{loss_name}', np.array(loss_values).mean(), epoch) for metric in self.val_metrics: self.sw.add_scalar(f'{log_prefix}Metrics/{metric.name}', metric.get_epoch_value(), epoch) if val_global_step % log_iters == 0 and self.is_master: logger.info('Epoch={}, Step={}, loss={:.4f}'.format( epoch, val_global_step, float(loss))) for metric in self.val_metrics: metric.log_states( self.sw, f'{log_prefix}Metrics/{metric.name}', val_global_step)
def forward(self, confidence, predicted_locations, labels, gt_locations): """Compute classification loss and smooth l1 loss. Args: confidence (batch_size, num_priors, num_classes): class predictions. locations (batch_size, num_priors, 4): predicted locations. labels (batch_size, num_priors): real labels of all the priors. boxes (batch_size, num_priors, 4): real boxes corresponding all the priors. """ num_classes = confidence.shape[2] with paddle.no_grad(): # derived from cross_entropy=sum(log(p)) loss = -F.log_softmax(confidence, 2)[:, :, 0] mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio) confidence = paddle.concat([ confidence[:, :, 0].masked_select(mask).reshape([-1, 1]), confidence[:, :, 1].masked_select(mask).reshape([-1, 1]) ], axis=1) classification_loss = F.cross_entropy(confidence.reshape( [-1, num_classes]), labels.masked_select(mask), reduction='sum') pos_mask = labels > 0 predicted_locations = predicted_locations.masked_select( paddle.concat([ pos_mask.reshape(pos_mask.shape + [1]), pos_mask.reshape(pos_mask.shape + [1]), pos_mask.reshape(pos_mask.shape + [1]), pos_mask.reshape(pos_mask.shape + [1]) ], axis=2)).reshape([-1, 4]) gt_locations = gt_locations.masked_select( paddle.concat([ pos_mask.reshape(pos_mask.shape + [1]), pos_mask.reshape(pos_mask.shape + [1]), pos_mask.reshape(pos_mask.shape + [1]), pos_mask.reshape(pos_mask.shape + [1]) ], axis=2)).reshape([-1, 4]) smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations.cast('float32'), reduction='sum') # smooth_l1_loss # smooth_l1_loss = F.mse_loss(predicted_locations, gt_locations, reduction='sum') #l2 loss num_pos = gt_locations.shape[0] return smooth_l1_loss / num_pos, classification_loss / num_pos
def _eval_seq_jde_single_image(self, iterator, save_dir=None, show_image=False, draw_threshold=0): if save_dir: if not os.path.exists(save_dir): os.makedirs(save_dir) tracker = self.model.tracker results = [] frame_id = 0 self.status['mode'] = 'track' self.model.eval() timer = Timer() while True: try: data = next(iterator) timer.tic() with paddle.no_grad(): pred_dets, pred_embs = self.model(data) online_targets = self.model.tracker.update( pred_dets, pred_embs) online_tlwhs, online_ids = [], [] online_scores = [] for t in online_targets: tlwh = t.tlwh tid = t.track_id tscore = t.score if tscore < draw_threshold: continue vertical = tlwh[2] / tlwh[3] > 1.6 if tlwh[2] * tlwh[ 3] > tracker.min_box_area and not vertical: online_tlwhs.append(tlwh) online_ids.append(tid) online_scores.append(tscore) timer.toc() # save results results.append( (frame_id + 1, online_tlwhs, online_scores, online_ids)) self.save_results(data, frame_id, online_ids, online_tlwhs, online_scores, timer.average_time, show_image, save_dir) frame_id += 1 yield results, frame_id except StopIteration as e: return
def test_iter(self, metrics=None): self.nets['generator'].eval() with paddle.no_grad(): self.output = self.nets['generator'](self.lq) self.visual_items['output'] = self.output self.nets['generator'].train() out_img = [] gt_img = [] for out_tensor, gt_tensor in zip(self.output, self.gt): out_img.append(tensor2img(out_tensor, (0., 1.))) gt_img.append(tensor2img(gt_tensor, (0., 1.))) if metrics is not None: for metric in metrics.values(): metric.update(out_img, gt_img)
def inference(model, data, output_path, beam_size=1, mode='infer', output_history=True, use_heuristic=True, model_name='seq2tree'): model.eval() os.makedirs(os.path.dirname(output_path), exist_ok=True) with paddle.no_grad(), open(output_path, 'w') as ofs: if mode == 'infer': _do_infer(model, data, beam_size, output_history, ofs, use_heuristic, model_name) elif mode == 'debug': _debug(model, data, ofs)
def accuracy(output, target, topk=(1, )): """Computes the accuracy over the k top predictions for the specified values of k""" with paddle.no_grad(): maxk = max(topk) batch_size = target.shape[0] _, pred = output.topk(maxk, 1, True, True) pred = pred.t() correct = pred.equal(target) res = [] for k in topk: correct_k = correct.astype(paddle.int32)[:k].flatten().sum( dtype='float32') res.append(correct_k / batch_size) return res
def accuracy(output, target, topk=(1, )): """Computes the accuracy over the k top predictions for the specified values of k""" with paddle.no_grad(): maxk = max(topk) batch_size = target.shape[0] _, pred = output.topk(maxk, 1, True, True) pred = pred.t() correct = paddle.cast(pred == target.reshape([1, -1]).expand_as(pred), 'float32') res = [] for k in topk: correct_k = correct[:k].reshape([-1]).sum(0, keepdim=True) res.append(correct_k * 100.0 / batch_size) return res
def embed_frames_batch(frames_batch): """ Computes embeddings for a batch of mel spectrogram. :param frames_batch: a batch mel of spectrogram as a numpy array of float32 of shape (batch_size, n_frames, n_channels) :return: the embeddings as a numpy array of float32 of shape (batch_size, model_embedding_size) """ if _model is None: raise Exception( "Model was not loaded. Call load_model() before inference.") frames = paddle.to_tensor(frames_batch) with paddle.no_grad(): embed = _model(frames).numpy() return embed
def run_image(self, img): if isinstance(img, str): ori_img = Image.open(img).convert('RGB') elif isinstance(img, np.ndarray): ori_img = Image.fromarray(img).convert('RGB') elif isinstance(img, Image.Image): ori_img = img img = self.norm(ori_img) x = paddle.to_tensor(img[np.newaxis, ...]) with paddle.no_grad(): out = self.model(x) pred_img = self.denorm(out.numpy()[0]) pred_img = Image.fromarray(pred_img) return pred_img
def do_predict(args): place = "gpu" place = paddle.set_device(place) model_class, tokenizer_class = MODEL_CLASSES[args.model_name_or_path] tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path) logger.info('Loading the model parameters, please wait...') model = model_class.from_pretrained( args.model_name_or_path, max_predict_len=args.max_out_len) model.eval() bos_id = tokenizer.convert_tokens_to_ids(args.start_token) eos_id = tokenizer.convert_tokens_to_ids(args.end_token) # Define model gpt = FasterGPT( model=model, topk=args.topk, topp=args.topp, max_out_len=args.max_out_len, bos_id=bos_id, eos_id=eos_id, temperature=args.temperature, decoding_lib=args.decoding_lib, use_fp16_decoding=args.use_fp16_decoding) # Set evaluate mode gpt.eval() input_ids = np.array( [[bos_id] for i in range(args.batch_size * 1)]).astype("int32").reshape( [args.batch_size, 1]) input_ids = paddle.to_tensor(input_ids) with paddle.no_grad(): for i in range(100): # For warmup. if 50 == i: paddle.fluid.core._cuda_synchronize(place) start = time.time() out_seq = gpt(input_ids) paddle.fluid.core._cuda_synchronize(place) logger.info("Average test time for decoding is %f ms" % ( (time.time() - start) / 50 * 1000)) output_sequence = out_seq.numpy().transpose() for i in range(args.batch_size): print("========== Sample-%d ==========" % i) print(tokenizer.convert_ids_to_string(output_sequence[i][1:]))
def predict(self, image, top_k=-1, prob_threshold=None): cpu_device = paddle.set_device("cpu") height, width, _ = image.shape image = self.transform(image) images = image.unsqueeze(0) images = images.to(self.device) with paddle.no_grad(): for i in range(1): self.timer.start() scores, boxes = self.net.forward(images) print("Inference time: ", self.timer.end()) boxes = boxes[0] scores = scores[0] if not prob_threshold: prob_threshold = self.filter_threshold # this version of nms is slower on GPU, so we move data to CPU. boxes = boxes.to(cpu_device) scores = scores.to(cpu_device) picked_box_probs = [] picked_labels = [] for class_index in range(1, scores.size(1)): probs = scores[:, class_index] mask = probs > prob_threshold probs = probs[mask] if probs.size(0) == 0: continue subset_boxes = boxes[mask, :] box_probs = paddle.concat([subset_boxes, probs.reshape(-1, 1)], 1) box_probs = box_utils.nms(box_probs, self.nms_method, score_threshold=prob_threshold, iou_threshold=self.iou_threshold, sigma=self.sigma, top_k=top_k, candidate_size=self.candidate_size) picked_box_probs.append(box_probs) picked_labels.extend([class_index] * box_probs.size(0)) if not picked_box_probs: return paddle.to_tensor([]), paddle.to_tensor( []), paddle.to_tensor([]) picked_box_probs = paddle.concat(picked_box_probs) picked_box_probs[:, 0] *= width picked_box_probs[:, 1] *= height picked_box_probs[:, 2] *= width picked_box_probs[:, 3] *= height return picked_box_probs[:, :4], paddle.to_tensor( picked_labels), picked_box_probs[:, 4]
def do_predict(args): place = "gpu" place = paddle.set_device(place) model_class, tokenizer_class = MODEL_CLASSES[args.model_name_or_path] tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path) logger.info('Loading the model parameters, please wait...') model = model_class.from_pretrained(args.model_name_or_path) model.eval() bos_id = tokenizer.convert_tokens_to_ids(args.start_token) eos_id = tokenizer.convert_tokens_to_ids(args.end_token) # Define model gpt = model # Set evaluate mode gpt.eval() input_ids = np.array([[bos_id] for i in range(args.batch_size * 1) ]).astype("int64").reshape([args.batch_size, 1]) input_ids = paddle.to_tensor(input_ids) with paddle.no_grad(): for i in range(100): # For warmup. if 50 == i: paddle.device.cuda.synchronize(place) start = time.time() out_seq, _ = gpt.generate(input_ids, top_k=args.topk, top_p=args.topp, max_length=args.max_length, temperature=args.temperature, bos_token_id=bos_id, eos_token_id=eos_id, decode_strategy="sampling", use_fp16_decoding=args.use_fp16_decoding, use_faster=True) output_sequence = out_seq.numpy() paddle.device.cuda.synchronize(place) logger.info("Average test time for decoding is %f ms" % ((time.time() - start) / 50 * 1000)) output_sequence = out_seq.numpy().tolist() for i in range(args.batch_size): print("========== Sample-%d ==========" % i) print(tokenizer.convert_ids_to_string(output_sequence[i]))
def predict( tokenized_src, decoder_max_length, is_last, cache, bos_id, result, tokenizer, transformer, n_best=1, max_out_len=256, eos_idx=1, waitk=1, ): # Set evaluate mode transformer.eval() if len(tokenized_src) < waitk: return result, cache, bos_id with paddle.no_grad(): paddle.disable_static() input_src = tokenized_src if is_last: decoder_max_length = max_out_len input_src += [eos_idx] src_word = paddle.to_tensor(input_src).unsqueeze(axis=0) finished_seq, finished_scores, cache = transformer.greedy_search( src_word, max_len=decoder_max_length, waitk=waitk, caches=cache, bos_id=bos_id) finished_seq = finished_seq.numpy() for beam_idx, beam in enumerate(finished_seq[0]): if beam_idx >= n_best: break id_list = post_process_seq(beam) if len(id_list) == 0: continue bos_id = id_list[-1] word_list = tokenizer.trg_vocab.to_tokens(id_list) for word in word_list: result.append(word) res = ' '.join(word_list).replace('@@ ', '') paddle.enable_static() return result, cache, bos_id
def do_predict(args): paddle.set_device(args.device) args.task_name = args.task_name.lower() train_ds, test_ds = load_dataset( 'clue', args.task_name, splits=('train', 'test')) tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path) trans_func = partial( convert_example, tokenizer=tokenizer, label_list=train_ds.label_list, max_seq_length=args.max_seq_length, is_test=True) batchify_fn = DataCollatorWithPadding(tokenizer) test_ds = test_ds.map(trans_func, lazy=True) test_batch_sampler = paddle.io.BatchSampler( test_ds, batch_size=args.batch_size, shuffle=False) test_data_loader = DataLoader( dataset=test_ds, batch_sampler=test_batch_sampler, collate_fn=batchify_fn, num_workers=0, return_list=True) num_classes = 1 if train_ds.label_list == None else len(train_ds.label_list) model = AutoModelForSequenceClassification.from_pretrained( args.model_name_or_path, num_classes=num_classes) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) if args.task_name == 'ocnli': args.task_name = 'ocnli_50k' f = open( os.path.join(args.output_dir, args.task_name + "_predict.json"), 'w') for step, batch in enumerate(test_data_loader): with paddle.no_grad(): logits = model(**batch) preds = paddle.argmax(logits, axis=1) for idx, pred in enumerate(preds): j = json.dumps({"id": idx, "label": train_ds.label_list[pred]}) f.write(j + "\n")
def eval(model, valid_dataloader, post_process_class, eval_class, model_type=None, extra_input=False): model.eval() with paddle.no_grad(): total_frame = 0.0 total_time = 0.0 pbar = tqdm(total=len(valid_dataloader), desc='eval model:', position=0, leave=True) max_iter = len(valid_dataloader) - 1 if platform.system( ) == "Windows" else len(valid_dataloader) for idx, batch in enumerate(valid_dataloader): if idx >= max_iter: break images = batch[0] start = time.time() if model_type == 'table' or extra_input: preds = model(images, data=batch[1:]) elif model_type == "kie": preds = model(batch) else: preds = model(images) batch = [item.numpy() for item in batch] # Obtain usable results from post-processing methods total_time += time.time() - start # Evaluate the results of the current batch if model_type in ['table', 'kie']: eval_class(preds, batch) else: post_result = post_process_class(preds, batch[1]) eval_class(post_result, batch) pbar.update(1) total_frame += len(images) # Get final metric,eg. acc or hmean metric = eval_class.get_metric() pbar.close() model.train() metric['fps'] = total_frame / total_time return metric
def generate(args, g_ema, mean_latent): with paddle.no_grad(): g_ema.eval() for i in tqdm(range(args.pics)): sample_z = paddle.randn((args.sample, args.latent)) sample, _ = g_ema([sample_z], truncation=args.truncation, truncation_latent=mean_latent) sample = np.uint8((sample * 0.5 + 0.5).clip(0, 1).numpy() * 255) sample = [t for t in sample] sample = np.concatenate(sample, 1) sample = sample.transpose((1, 2, 0)) sample = Image.fromarray(sample) sample.save(f"sample/{str(i).zfill(6)}.png")
def do_predict(args): place = "gpu" paddle.set_device(place) # Define data loader test_loader, to_tokens = reader.create_infer_loader(args) # Define model transformer = FasterTransformer( src_vocab_size=args.src_vocab_size, trg_vocab_size=args.trg_vocab_size, max_length=args.max_length + 1, n_layer=args.n_layer, n_head=args.n_head, d_model=args.d_model, d_inner_hid=args.d_inner_hid, dropout=args.dropout, weight_sharing=args.weight_sharing, bos_id=args.bos_idx, eos_id=args.eos_idx, decoding_strategy="beam_search", beam_size=args.beam_size, max_out_len=args.max_out_len, decoding_lib=args.decoding_lib, use_fp16_decoding=args.use_fp16_decoding) # Set evaluate mode transformer.eval() # Load checkpoint. transformer.load(init_from_params=os.path.join(args.init_from_params, "transformer.pdparams")) f = open(args.output_file, "w") with paddle.no_grad(): for (src_word, ) in test_loader: finished_seq = transformer(src_word=src_word) finished_seq = finished_seq.numpy().transpose([1, 2, 0]) for ins in finished_seq: for beam_idx, beam in enumerate(ins): if beam_idx >= args.n_best: break id_list = post_process_seq(beam, args.bos_idx, args.eos_idx) word_list = to_tokens(id_list) sequence = " ".join(word_list) + "\n" f.write(sequence)
def test(model, test_loader): correct = num = 0 model.eval() with paddle.no_grad(): for batch_data in test_loader: ids, ids_reverse, label = batch_data # [batch_size, 2] output = model((ids, ids_reverse)) num += label.shape[0] predict = paddle.argmax(output, axis=1) label = paddle.cast(label, dtype=predict.dtype) correct += paddle.sum(paddle.cast(predict == label, dtype='int64')).numpy()[0] model.train() return correct * 1.0 / num
def test_paddle_imperative_no_grad_guard(self): data = np.array([[2, 3], [4, 5]]).astype('float32') with fluid.dygraph.guard(): l0 = fluid.Linear(2, 2) self.assertTrue(l0.weight._grad_ivar() is None) l1 = fluid.Linear(2, 2) with paddle.no_grad(): self.assertTrue(l1.weight.stop_gradient is False) tmp = l1.weight * 2 self.assertTrue(tmp.stop_gradient) x = paddle.to_tensor(data) y = paddle.add(l0(x), tmp) o = l1(y) o.backward() self.assertTrue(tmp._grad_ivar() is None) self.assertTrue(l0.weight._grad_ivar() is not None)
def evaluate(model, criterion, metric, data_loader, width_mult=1.0): with paddle.no_grad(): model.eval() metric.reset() for batch in data_loader: input_ids, segment_ids, labels = batch logits = model(input_ids, segment_ids, attention_mask=[None, None]) if isinstance(logits, tuple): logits = logits[0] loss = criterion(logits, labels) correct = metric.compute(logits, labels) metric.update(correct) results = metric.accumulate() print("width_mult: %f, eval loss: %f, %s: %s\n" % (width_mult, loss.numpy(), metric.name(), results), end='') model.train()
def do_predict(args): place = "gpu" place = paddle.set_device(place) # Define model transformer = FasterTransformer(src_vocab_size=args.src_vocab_size, trg_vocab_size=args.trg_vocab_size, max_length=args.max_length + 1, num_encoder_layers=args.n_layer, num_decoder_layers=args.n_layer, n_head=args.n_head, d_model=args.d_model, d_inner_hid=args.d_inner_hid, dropout=args.dropout, weight_sharing=args.weight_sharing, bos_id=args.bos_idx, eos_id=args.eos_idx, decoding_strategy=args.decoding_strategy, beam_size=args.beam_size, topk=args.topk, topp=args.topp, max_out_len=args.max_out_len, decoding_lib=args.decoding_lib, use_fp16_decoding=args.use_fp16_decoding) # Set evaluate mode transformer.eval() enc_output = paddle.randn( [args.infer_batch_size, args.max_length, args.d_model]) if args.use_fp16_decoding: enc_output = paddle.cast(enc_output, "float16") mem_seq_len = paddle.randint(1, args.max_length + 1, shape=[args.infer_batch_size], dtype="int32") with paddle.no_grad(): for i in range(100): # For warmup. if 50 == i: start = time.time() transformer.decoding(enc_output=enc_output, memory_seq_lens=mem_seq_len) logger.info("Average test time for decoding is %f ms" % ((time.time() - start) / 50 * 1000))
def forward_train(self, x, prev_output): with paddle.no_grad(): points = self.get_points_train(prev_output, calculate_uncertainty) fine_grained_point_feats = self._get_fine_grained_point_feats( x, points) # [2, 256, 2048] coarse_point_feats = self._get_coarse_point_feats( prev_output, points) # [2, 19, 2048] # forward for train fusion_point_feats = paddle.concat( [fine_grained_point_feats, coarse_point_feats], axis=1) for fc in self.fcs: fusion_point_feats = fc(fusion_point_feats) if self.coarse_pred_each_layer: fusion_point_feats = paddle.concat( (fusion_point_feats, coarse_point_feats), axis=1) point_logits = self.cls_seg(fusion_point_feats) return [point_logits, points] # for points loss
def enhance_from_batch(self, img): if isinstance(img, np.ndarray): img_ori, _ = resize_and_crop_batch(img, 512) img = paddle.to_tensor(img_ori).transpose([0, 3, 1, 2]) else: assert img.shape[1:] == [3, 512, 512] img_ori = img.transpose([0, 2, 3, 1]).numpy() img_t = (img / 255. - 0.5) / 0.5 with paddle.no_grad(): out, __ = self.face_enhance(img_t) image_tensor = out * 0.5 + 0.5 image_tensor = image_tensor.transpose([0, 2, 3, 1]) # RGB image_numpy = paddle.clip(image_tensor, 0, 1) * 255.0 out = image_numpy.astype(np.uint8).cpu().numpy() return out * self.mask + (1 - self.mask) * img_ori
def animate(image_path, output_path): genA2B, faceseg = ResnetUGATITP2CGenerator(), FaceSeg() genA2B.set_state_dict(load(__file__.replace('py', 'bin'))) genA2B.eval() face_image = align_crop(open(image_path)) face_mask = res(faceseg(face_image), (256, 256))[:, :, newaxis] / 255. face = to_tensor( transpose(((resize(face_image, (256, 256), interpolation=3) * face_mask + (1 - face_mask) * 255) / 127.5 - 1)[newaxis, :, :, :], (0, 3, 1, 2)).astype(float32)) with no_grad(): cartoon = genA2B(face)[0][0] imwrite( output_path, cvtColor(((transpose(cartoon.numpy(), (1, 2, 0)) + 1) * 127.5 * face_mask + (1 - face_mask) * 255).astype(uint8), 4))
def predict(data_loader, model, id2label=None): outputs = [] progress_bar = tqdm( range(len(data_loader)), desc="Predition Iteration", ) with paddle.no_grad(): for batch in data_loader: input_ids, segment_ids = batch logits = model(input_ids) if id2label is not None: pred = paddle.argmax(logits, axis=-1).cpu().tolist() outputs.extend(list(map(lambda x: id2label[x], pred))) else: pred = logits.squeeze(-1).cpu().tolist() outputs.extend(pred) progress_bar.update(1) return outputs
def predict(self, feat: Dict[str, paddle.Tensor], ensemble_representations: bool = True, return_representations: bool = True): """Predict protein structure and encoding representation""" if self.dynamic_subbatch_size: seq_len = feat['aatype'].shape[-1] extra_msa_num = feat['extra_msa'].shape[-2] self.update_subbatch_size(seq_len, extra_msa_num) with paddle.no_grad(): ret = self.alphafold( feat, ensemble_representations=ensemble_representations, return_representations=return_representations) tensor_to_numpy(ret) return ret
def eye_(tensor): r"""Fills the 2-dimensional input `Tensor` with the identity matrix. Preserves the identity of the inputs in `Linear` layers, where as many inputs are preserved as possible. Args: tensor: a 2-dimensional `torch.Tensor` Examples: >>> w = torch.empty(3, 5) >>> nn.init.eye_(w) """ if tensor.ndimension() != 2: raise ValueError("Only tensors with 2 dimensions are supported") with paddle.no_grad(): tensor.set_value(paddle.eye(*tensor.shape)) return tensor
def _critic_learn(self, obs, action, reward, next_obs, terminal): with paddle.no_grad(): next_action, next_log_pro = self.sample(next_obs) q1_next, q2_next = self.target_model.critic_model( next_obs, next_action) target_Q = paddle.minimum(q1_next, q2_next) - self.alpha * next_log_pro terminal = paddle.cast(terminal, dtype='float32') target_Q = reward + self.gamma * (1. - terminal) * target_Q cur_q1, cur_q2 = self.model.critic_model(obs, action) critic_loss = F.mse_loss(cur_q1, target_Q) + F.mse_loss( cur_q2, target_Q) self.critic_optimizer.clear_grad() critic_loss.backward() self.critic_optimizer.step() return critic_loss
def reduce_loss_dict(loss_dict): world_size = get_world_size() if world_size < 2: return loss_dict with paddle.no_grad(): keys = [] losses = [] for k in loss_dict.keys(): keys.append(k) loss = dist.all_reduce(loss_dict[k].astype( 'float32')) / paddle.distributed.get_world_size() losses.append(loss) reduced_losses = {k: v for k, v in zip(keys, losses)} return reduced_losses
def run(self, input_texts, max_seq_len=128, batch_size=1, return_hidden_states=None): """Predict a input text by wordtag. Args: input_text: input text. max_seq_len: max sequence length. batch_size: Batch size per GPU/CPU for training. Returns: dict -- wordtag results. """ if isinstance(input_texts, str): input_texts = [input_texts] if not isinstance(input_texts, str) and not isinstance( input_texts, list): raise TypeError( f"Bad inputs, input text should be str or list of str, {type(input_texts)} found!" ) infer_data_loader, short_input_texts = self._pre_process_text( input_texts, max_seq_len, batch_size) all_pred_tags = [] with paddle.no_grad(): for batch in infer_data_loader: input_ids, token_type_ids, seq_len = batch seq_logits, cls_logits = self._model(input_ids, token_type_ids, lengths=seq_len) scores, pred_tags = self._model.viterbi_decoder( seq_logits, seq_len) all_pred_tags += pred_tags.numpy().tolist() results = self._decode(short_input_texts, all_pred_tags) results = self._convert_short_text2long_text_result( input_texts, results) if self.linking is True: for res in results: self._term_linking(res) outputs = results if return_hidden_states is True: outputs = (results, ) + (seq_logits, cls_logits) return outputs