def net(self, input, is_infer=False): self.user_sparse_inputs = input[:4] self.mov_sparse_inputs = input[4:7] self.label_input = input[-1] if is_infer: self.batch_size = self.config.get("runner.infer_batch_size") else: self.batch_size = self.config.get("runner.train_batch_size") rank_model = DNNLayer(self.sparse_feature_number, self.sparse_feature_dim, self.hidden_layers) predict = rank_model(self.batch_size, self.user_sparse_inputs, self.mov_sparse_inputs, self.label_input) self.inference_target_var = predict if is_infer: uid = self.user_sparse_inputs[0] movieid = self.mov_sparse_inputs[0] label = self.label_input predict = predict fetch_dict = { 'userid': uid, 'movieid': movieid, 'label': label, 'predict': predict } return fetch_dict cost = F.square_error_cost( predict, paddle.cast(x=self.label_input, dtype='float32')) avg_cost = paddle.mean(cost) self._cost = avg_cost fetch_dict = {'Loss': avg_cost} return fetch_dict
def net(self, input, is_infer=False): self.user_sparse_inputs = self._sparse_data_var[2:6] self.mov_sparse_inputs = self._sparse_data_var[6:9] self.label_input = self._sparse_data_var[-1] recall_model = DNNLayer(self.sparse_feature_number, self.sparse_feature_dim, self.hidden_layers) predict = recall_model(self.batch_size, self.user_sparse_inputs, self.mov_sparse_inputs, self.label_input) self.predict = predict if is_infer: self._infer_results["uid"] = self._sparse_data_var[2] self._infer_results["movieid"] = self._sparse_data_var[6] self._infer_results["label"] = self._sparse_data_var[-1] self._infer_results["predict"] = self.predict return cost = F.square_error_cost( self.predict, paddle.cast(x=self.label_input, dtype='float32')) avg_cost = paddle.mean(cost) self._cost = avg_cost self._metrics["LOSS"] = avg_cost
def train(): # 声明定义好的线性回归模型 model = Regressor() # 开启模型训练模式 model.train() # 加载数据 training_data, test_data = load_data() # 定义优化算法,使用随机梯度下降SGD # 学习率设置为0.01 opt = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters()) EPOCH_NUM = 10 # 设置外层循环次数 BATCH_SIZE = 10 # 设置batch大小 # 定义外层循环 for epoch_id in range(EPOCH_NUM): # 在每轮迭代开始之前,将训练数据的顺序随机的打乱 np.random.shuffle(training_data) # 将训练数据进行拆分,每个batch包含10条数据 mini_batches = [ training_data[k:k + BATCH_SIZE] for k in range(0, len(training_data), BATCH_SIZE) ] # 定义内层循环 for iter_id, mini_batch in enumerate(mini_batches): x = np.array(mini_batch[:, :-1]) # 获得当前批次训练数据 y = np.array(mini_batch[:, -1:]) # 获得当前批次训练标签(真实房价) # 将numpy数据转为飞桨动态图tensor形式 house_features = paddle.to_tensor(x) prices = paddle.to_tensor(y) # 前向计算 predicts = model(house_features) # 计算损失 loss = F.square_error_cost(predicts, label=prices) avg_loss = paddle.mean(loss) if iter_id % 20 == 0: print("epoch: {}, iter: {}, loss is: {}".format( epoch_id, iter_id, avg_loss.numpy())) # 反向传播 avg_loss.backward() # 最小化loss,更新参数 opt.step() # 清除梯度 opt.clear_grad() # 保存模型参数,文件名为LR_model.pdparams paddle.save(model.state_dict(), 'LR_model.pdparams') print("模型保存成功,模型参数保存在LR_model.pdparams中")
def pd_model(): class Regressor(paddle.nn.Layer): def __init__(self): super(Regressor, self).__init__() self.fc_ = Linear(in_features=13, out_features=1) @paddle.jit.to_static def forward(self, inputs): # pylint: disable=arguments-differ return self.fc_(inputs) model = Regressor() model.train() training_data, test_data = get_dataset() opt = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters()) EPOCH_NUM = 10 BATCH_SIZE = 10 for epoch_id in range(EPOCH_NUM): np.random.shuffle(training_data) mini_batches = [ training_data[k:k + BATCH_SIZE] for k in range(0, len(training_data), BATCH_SIZE) ] for iter_id, mini_batch in enumerate(mini_batches): x = np.array(mini_batch[:, :-1]).astype("float32") y = np.array(mini_batch[:, -1:]).astype("float32") house_features = paddle.to_tensor(x) prices = paddle.to_tensor(y) predicts = model(house_features) loss = F.square_error_cost(predicts, label=prices) avg_loss = paddle.mean(loss) if iter_id % 20 == 0: print("epoch: {}, iter: {}, loss is: {}".format( epoch_id, iter_id, avg_loss.numpy())) avg_loss.backward() opt.step() opt.clear_grad() np_test_data = np.array(test_data).astype("float32") return ModelWithData(model=model, inference_dataframe=np_test_data[:, :-1])
def train(model, opt, data_loader, eval_data_loader=None, epochs=3): """训练函数""" model.train() best_rmse = float("inf") for epoch in range(epochs): for idx, data in enumerate(tqdm(data_loader())): # 获得数据,并转为tensor格式 usr, mov, score = data usr_v = [paddle.to_tensor(var) for var in usr] mov_v = [paddle.to_tensor(var) for var in mov] scores_label = paddle.to_tensor(score) # 前向计算结果 _, _, scores_predict = model(usr_v, mov_v) # 计算损失 loss = F.square_error_cost(scores_predict, scores_label) # 均方误差 avg_loss = paddle.mean(loss) if idx % 500 == 0: logger.info("epoch: {}, batch_id: {}, loss is: {}".format( epoch, idx, avg_loss.numpy())) avg_loss.backward() opt.step() opt.clear_grad() if eval_data_loader: _, _, rmse = evaluate(model, data_loader=eval_data_loader) if rmse < best_rmse: best_rmse = rmse # 保存最佳模型 paddle.save( model.state_dict(), os.path.join(work_root, "models/movie_recommend/best.pdparams")) # 保存最后模型 paddle.save( model.state_dict(), os.path.join(work_root, "models/movie_recommend/{}.pdparams".format(epoch)))
def pem_reg_loss_func(self, pred_score, gt_iou_map, mask): gt_iou_map = paddle.multiply(gt_iou_map, mask) u_hmask = paddle.cast(x=gt_iou_map > 0.7, dtype=self.datatype) u_mmask = paddle.logical_and(gt_iou_map <= 0.7, gt_iou_map > 0.3) u_mmask = paddle.cast(x=u_mmask, dtype=self.datatype) u_lmask = paddle.logical_and(gt_iou_map <= 0.3, gt_iou_map >= 0.) u_lmask = paddle.cast(x=u_lmask, dtype=self.datatype) u_lmask = paddle.multiply(u_lmask, mask) num_h = paddle.cast(paddle.sum(u_hmask), dtype=self.datatype) num_m = paddle.cast(paddle.sum(u_mmask), dtype=self.datatype) num_l = paddle.cast(paddle.sum(u_lmask), dtype=self.datatype) r_m = num_h / num_m u_smmask = paddle.uniform( shape=[gt_iou_map.shape[1], gt_iou_map.shape[2]], dtype=self.datatype, min=0.0, max=1.0) u_smmask = paddle.multiply(u_mmask, u_smmask) u_smmask = paddle.cast(x=(u_smmask > (1. - r_m)), dtype=self.datatype) r_l = num_h / num_l u_slmask = paddle.uniform( shape=[gt_iou_map.shape[1], gt_iou_map.shape[2]], dtype=self.datatype, min=0.0, max=1.0) u_slmask = paddle.multiply(u_lmask, u_slmask) u_slmask = paddle.cast(x=(u_slmask > (1. - r_l)), dtype=self.datatype) weights = u_hmask + u_smmask + u_slmask weights.stop_gradient = True loss = F.square_error_cost(pred_score, gt_iou_map) loss = paddle.multiply(loss, weights) loss = 0.5 * paddle.sum(loss) / paddle.sum(weights) return loss
def train(model: Model): # 配置训练参数 lr = 0.001 Epoches = 10 paddle.set_device('cpu') # 启动训练 model.train() # 获得数据读取器 data_loader = model.train_loader # 使用adam优化器,学习率使用0.01 opt = paddle.optimizer.Adam(learning_rate=lr, parameters=model.parameters()) for epoch in range(0, Epoches): for idx, data in enumerate(data_loader()): # 获得数据,并转为tensor格式 usr, mov, score = data usr_v = [paddle.to_tensor(var) for var in usr] mov_v = [paddle.to_tensor(var) for var in mov] scores_label = paddle.to_tensor(score) # 计算出算法的前向计算结果 _, _, scores_predict = model(usr_v, mov_v) # 计算loss loss = F.square_error_cost(scores_predict, scores_label) avg_loss = paddle.mean(loss) if idx % 500 == 0: print("epoch: {}, batch_id: {}, loss is: {}".format( epoch, idx, avg_loss.numpy())) # 损失函数下降,并清除梯度 avg_loss.backward() opt.step() opt.clear_grad() # 每个epoch 保存一次模型 paddle.save(model.state_dict(), './checkpoint/epoch' + str(epoch) + '.pdparams')
training_data[k:k + BATCH_SIZE] for k in range(0, len(training_data), BATCH_SIZE) ] # 定义内层循环 for iter_id, mini_batch in enumerate(mini_batches): x = np.array(mini_batch[:, :-1]) # 获得当前批次训练数据 y = np.array(mini_batch[:, -1:]) # 获得当前批次训练标签(真实房价) # 将numpy数据转为飞桨动态图tensor的格式 house_features = paddle.to_tensor(x) prices = paddle.to_tensor(y) # 前向计算 predicts = model(house_features) # 计算损失 loss = F.square_error_cost(predicts, label=prices) avg_loss = paddle.mean(loss) if iter_id % 20 == 0: print("epoch: {}, iter: {}, loss is: {}".format( epoch_id, iter_id, avg_loss.numpy())) # 反向传播,计算每层参数的梯度值 avg_loss.backward() # 更新参数,根据设置好的学习率迭代一步 opt.step() # 清空梯度变量,以备下一轮计算 opt.clear_grad() # 保存模型参数,文件名为LR_model.pdparams paddle.save(model.state_dict(), 'LR_model.pdparams') print("模型保存成功,模型参数保存在LR_model.pdparams中")
def create_loss(predict, label_input): cost = F.square_error_cost(predict, paddle.cast(x=label_input, dtype='float32')) avg_cost = paddle.mean(cost) return avg_cost
def func(x): minimum_ = paddle.assign(minimum) scale_ = paddle.assign(scale) return paddle.sum( paddle.multiply(scale_, (F.square_error_cost(x, minimum_))))
#!/usr/bin/env python # coding=utf-8 import numpy import paddle import paddle.static as static import paddle.nn.functional as F # 开启静态图模式 paddle.enable_static() paddle.set_device('cpu') # 网络结构定义 x = static.data(name='X', shape=[None, 13], dtype='float32') y = static.data(name='Y', shape=[None, 1], dtype='float32') predict = static.nn.fc(x=x, size=1) loss = F.square_error_cost(input=predict, label=y) avg_loss = paddle.mean(loss) # 执行环境准备 exe = static.Executor(paddle.CPUPlace()) exe.run(static.default_startup_program()) # 执行网络 x = numpy.random.random(size=(7, 13)).astype('float32') y = numpy.random.random(size=(8, 1)).astype('float32') loss_data, = exe.run(static.default_main_program(), feed={ 'X': x, 'Y': y }, fetch_list=[avg_loss.name])
import paddle import paddle.static as static import paddle.nn.functional as F paddle.enable_static() x = static.data(name='x', shape=[None, 13], dtype='float32') y = static.data(name='y', shape=[None, 1], dtype='float32') y_predict = static.nn.fc(input=x, size=1, act=None) cost = F.square_error_cost(input=y_predict, label=y) avg_loss = paddle.mean(cost) sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_loss) exec_strategy = static.ExecutionStrategy() exec_strategy.num_threads = 4 train_exe = static.ParallelExecutor(use_cuda=False, loss_name=avg_loss.name, exec_strategy=exec_strategy)