def train(): """bergin train""" arr1 = [] arr2 = [] dist.init_parallel_env() set_seed(2021) layer = LinearNet() if dist.get_world_size() > 1: dp_layer = paddle.DataParallel(layer) else: dp_layer = layer layer2 = LinearNet() if dist.get_world_size() > 1: dp_layer2 = paddle.DataParallel(layer2) else: dp_layer2 = layer2 dp_layer2.set_state_dict(dp_layer.state_dict()) loss_fn = nn.MSELoss() adam = opt.Adam( learning_rate=0.001, parameters=dp_layer.parameters()) adam2 = opt.Adam( learning_rate=0.001, parameters=dp_layer2.parameters()) for i in range(2): batch_size = 10 shard = int(batch_size / dist.get_world_size()) start_no = shard * dist.get_rank() end_no = start_no + shard inputs = paddle.randn([10, 10], 'float32')[start_no:end_no] outputs = dp_layer(inputs) labels = paddle.randn([10, 1], 'float32')[start_no:end_no] loss = loss_fn(outputs, labels) if dist.get_rank() == 0: arr1.append(loss.numpy()[0]) loss.backward() adam.step() adam.clear_grad() outputs = dp_layer2(inputs) loss = loss_fn(outputs, labels) loss.backward() if dist.get_rank() == 0: arr2.append(loss.numpy()[0]) adam2.step() adam2.clear_grad() check_data(arr1, arr2)
def train(): dist.init_parallel_env() # 1. initialize parallel environment set_seed(2021) # 2. create data parallel layer & optimizer layer = LinearNet() if dist.get_world_size() > 1: dp_layer = paddle.DataParallel(layer) else: dp_layer = layer layer2 = LinearNet() if dist.get_world_size() > 1: dp_layer2 = paddle.DataParallel(layer2) else: dp_layer2 = layer2 dp_layer2.set_state_dict(dp_layer.state_dict()) loss_fn = nn.MSELoss() adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters()) adam2 = opt.Adam(learning_rate=0.001, parameters=dp_layer2.parameters()) # 3. run layer print("Start") for i in range(10): batch_size = 10 shard = int(batch_size / dist.get_world_size()) start_no = shard * dist.get_rank() end_no = start_no + shard inputs = paddle.randn([10, 10], 'float32')[start_no:end_no] outputs = dp_layer(inputs) labels = paddle.randn([10, 1], 'float32')[start_no:end_no] loss = loss_fn(outputs, labels) if dist.get_rank() == 0: print("Loss1", loss.numpy()[0]) print(dp_layer.parameters()) loss.backward() adam.step() adam.clear_grad() outputs = dp_layer2(inputs) loss = loss_fn(outputs, labels) loss.backward() if dist.get_rank() == 0: print("Loss2", loss.numpy()[0]) print(dp_layer2.parameters()) adam2.step() adam2.clear_grad()
def train(print_result=False): # 1. enable dynamic mode paddle.disable_static() # 2. initialize parallel environment dist.init_parallel_env() # 3. create data parallel layer & optimizer layer = LinearNet() dp_layer = paddle.DataParallel(layer) loss_fn = nn.MSELoss() adam = opt.Adam( learning_rate=0.001, parameters=dp_layer.parameters()) # 4. run layer inputs = paddle.randn([10, 10], 'float32') outputs = dp_layer(inputs) labels = paddle.randn([10, 1], 'float32') loss = loss_fn(outputs, labels) if print_result is True: print("loss:", loss.numpy()) loss.backward() adam.step() adam.clear_grad()
def train(print_result=True): """train""" # 1. initialize parallel environment train_data_list1 = [] train_data_list2 = [] dist.init_parallel_env() # 2. create data parallel layer & optimizer layer = LinearNet() dp_layer = paddle.DataParallel(layer) loss_fn = nn.MSELoss() adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters()) # 3. run layer inputs = paddle.randn([10, 10], 'float32') outputs = dp_layer(inputs) labels = paddle.randn([10, 1], 'float32') loss = loss_fn(outputs, labels) assert len(loss) == 1 if print_result is True: train_data_list1.append(loss.numpy()) assert len(train_data_list1) loss.backward() adam.step() adam.clear_grad()
def train(): # enable dygraph mode paddle.disable_static() dist.init_parallel_env() # create network layer = LinearNet() dp_layer = paddle.DataParallel(layer) loss_fn = nn.CrossEntropyLoss() adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters()) # print(core._get_device_properties(dist.ParallelEnv().device_id)) # create data loader # loader = paddle.io.DataLoader.from_generator(capacity=5, use_multiprocess=True) loader = paddle.io.DataLoader.from_generator(capacity=5) loader.set_batch_generator(random_batch_reader()) for epoch_id in range(EPOCH_NUM): for batch_id, (image, label) in enumerate(loader()): out = layer(image) loss = loss_fn(out, label) loss = dp_layer.scale_loss(loss) loss.backward() dp_layer.apply_collective_grads() adam.step() adam.clear_grad() print("Epoch {} batch {}: loss = {}".format( epoch_id, batch_id, np.mean(loss.numpy())))
def train(): # 1. enable dynamic mode paddle.disable_static() # 2. initialize parallel environment dist.init_parallel_env() # 3. create data parallel layer & optimizer layer = LinearNet() dp_layer = paddle.DataParallel(layer) loss_fn = nn.MSELoss() adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters()) # 4. run layer inputs = paddle.randn([10, 10], 'float32') outputs = dp_layer(inputs) labels = paddle.randn([10, 1], 'float32') loss = loss_fn(outputs, labels) loss = dp_layer.scale_loss(loss) loss.backward() dp_layer.apply_collective_grads() adam.step() adam.clear_grad()
def train(print_result=False): # 1. initialize parallel environment dist.init_parallel_env() # 2. create data parallel layer & optimizer layer = LinearNet() dp_layer = paddle.DataParallel(layer) loss_fn = nn.MSELoss() adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters()) # 3. run layer inputs = paddle.randn([10, 10], 'float32') outputs = dp_layer(inputs) labels = paddle.randn([10, 1], 'float32') loss = loss_fn(outputs, labels) if print_result is True: print("Rank:", int(os.getenv("PADDLE_TRAINER_ID"))) loss.backward() adam.step() adam.clear_grad() return int(os.getenv("PADDLE_TRAINER_ID"))
def train(): # init env dist.init_parallel_env() # create network layer = LinearNet() dp_layer = paddle.DataParallel(layer) loss_fn = nn.CrossEntropyLoss() adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters()) # create data loader dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) loader = paddle.io.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=1) # train for epoch_id in range(EPOCH_NUM): for batch_id, (image, label) in enumerate(loader()): out = layer(image) loss = loss_fn(out, label) loss.backward() adam.step() adam.clear_grad() if dist.get_rank() == 0: print("Epoch {} batch {}: loss = {}".format( epoch_id, batch_id, np.mean(loss.numpy())))
def get_paddle_model(model_path): def train(layer, loader, loss_fn, optimizer): for _ in range(1): for _, (image, label) in enumerate(loader()): out = layer(image) loss = loss_fn(out, label) loss.backward() optimizer.step() optimizer.clear_grad() paddle.disable_static() model_layer = _LinearNet() loss_func = nn.CrossEntropyLoss() adam = opt.Adam(learning_rate=0.001, parameters=model_layer.parameters()) dataset = _RandomDataset(64) data_loader = paddle.io.DataLoader(dataset, batch_size=16, shuffle=True, drop_last=True, num_workers=2) train(model_layer, data_loader, loss_func, adam) paddle.jit.save(layer=model_layer, path=os.path.join(model_path, 'model'), input_spec=[InputSpec(shape=[None, 784], dtype='float32')])
def train(print_result=True): # 1. enable dynamic mode # device = paddle.set_device('gpu') # paddle.disable_static(device) # 2. initialize parallel environment dist.init_parallel_env() # 3. create data parallel layer & optimizer layer = LinearNet() dp_layer = paddle.DataParallel(layer) loss_fn = nn.MSELoss() adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters()) dataset = FakeDataset() # loader = paddle.io.DataLoader(dataset, batch_size=2, places=device, num_workers=2) loader = paddle.io.DataLoader(dataset, batch_size=2, num_workers=2) # 4. run layer for inputs, labels in loader: # inputs = paddle.randn([10, 10], 'float32') outputs = dp_layer(inputs) # labels = paddle.randn([10, 1], 'float32') loss = loss_fn(outputs, labels) if print_result is True: print("loss:", loss.numpy()) # loss = dp_layer.scale_loss(loss) loss.backward() # dp_layer.apply_collective_grads() adam.step() adam.clear_grad()
def __init__(self, recmodel : PairWiseModel, config : dict): self.model = recmodel self.weight_decay = config['decay'] self.config = config self.lr = config['lr'] self.opt = optimizer.Adam(parameters=self.model.parameters(), learning_rate=self.lr) if self.config['multicpu']: self.opt = fleet.distributed_optimizer(self.opt)
def __call__(self, parameters): opt = optim.Adam(learning_rate=self.learning_rate, beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon, weight_decay=self.weight_decay, grad_clip=self.grad_clip, name=self.name, lazy_mode=self.lazy_mode, parameters=parameters) return opt
def __init__(self, state_dim, action_dim, max_action, gamma=0.99, tau=0.001): # 动作网络与目标动作网络 self.actor = Actor(state_dim, action_dim, max_action) self.actor_target = copy.deepcopy(self.actor) self.actor_optimizer = optim.Adam(parameters=self.actor.parameters(), learning_rate=1e-4) # 值函数网络与目标值函数网络 self.critic = Critic(state_dim, action_dim) self.critic_target = copy.deepcopy(self.critic) self.critic_optimizer = optim.Adam(parameters=self.critic.parameters(), weight_decay=1e-2) self.gamma = gamma self.tau = tau
def __call__(self, model_list): # model_list is None in static graph parameters = sum([m.parameters() for m in model_list], []) if model_list else None opt = optim.Adam(learning_rate=self.learning_rate, beta1=self.beta1, beta2=self.beta2, epsilon=self.epsilon, weight_decay=self.weight_decay, grad_clip=self.grad_clip, name=self.name, lazy_mode=self.lazy_mode, multi_precision=self.multi_precision, parameters=parameters) return opt
def build_and_train_model(self): # create network layer = LinearNet() loss_fn = nn.CrossEntropyLoss() adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters()) # create data loader # TODO: using new DataLoader cause unknown Timeout on windows, replace it loader = random_batch_reader() # train train(layer, loader, loss_fn, adam) return layer, adam
def test_create_param_lr_with_1_for_coverage(self): x = paddle.fluid.framework.ParamBase( dtype="float32", shape=[5, 10], lod_level=0, name="x", optimize_attr={'learning_rate': 1.0}) x.value().get_tensor().set( np.random.random((5, 10)).astype('float32'), paddle.fluid.framework._current_expected_place()) y = paddle.ones([5, 10]) z = x + y opt = optimizer.Adam(learning_rate=self.lr, parameters=[x]) z.backward() opt.step()
def test_paddle_save_load_v2(self): paddle.disable_static() class StepDecay(LRScheduler): def __init__(self, learning_rate, step_size, gamma=0.1, last_epoch=-1, verbose=False): self.step_size = step_size self.gamma = gamma super(StepDecay, self).__init__(learning_rate, last_epoch, verbose) def get_lr(self): i = self.last_epoch // self.step_size return self.base_lr * (self.gamma**i) layer = LinearNet() inps = paddle.randn([2, IMAGE_SIZE]) adam = opt.Adam(learning_rate=StepDecay(0.1, 1), parameters=layer.parameters()) y = layer(inps) y.mean().backward() adam.step() state_dict = adam.state_dict() path = 'paddle_save_load_v2/model.pdparams' with self.assertRaises(TypeError): paddle.save(state_dict, path, use_binary_format='False') # legacy paddle.save, paddle.load paddle.framework.io._legacy_save(state_dict, path) load_dict_tensor = paddle.load(path, return_numpy=False) # legacy paddle.load, paddle.save paddle.save(state_dict, path) load_dict_np = paddle.framework.io._legacy_load(path) for k, v in state_dict.items(): if isinstance(v, dict): self.assertTrue(v == load_dict_tensor[k]) else: self.assertTrue( np.array_equal(v.numpy(), load_dict_tensor[k].numpy())) if not np.array_equal(v.numpy(), load_dict_np[k]): print(v.numpy()) print(load_dict_np[k]) self.assertTrue(np.array_equal(v.numpy(), load_dict_np[k]))
def get_optimizer(config, parameters): clip = nn.ClipGradByNorm(clip_norm=config.optim.grad_clip) if config.optim.optimizer == 'Adam': return optim.Adam(parameters=parameters, learning_rate=config.optim.lr, weight_decay=config.optim.weight_decay, beta1=config.optim.beta1, beta2=0.999, epsilon=config.optim.eps, grad_clip=clip) elif config.optim.optimizer == 'RMSProp': return optim.RMSprop(parameters=parameters, learning_rate=config.optim.lr, weight_decay=config.optim.weight_decay, grad_clip=clip) elif config.optim.optimizer == 'SGD': return optim.SGD(parameters=parameters, learning_rate=config.optim.lr, momentum=0.9, grad_clip=clip) else: raise NotImplementedError('Optimizer {} not understood.'.format( config.optim.optimizer))
def train(): """train""" # 1. initialize parallel environment dist.init_parallel_env() # 2. create data parallel layer & optimizer layer = LinearNet() dp_layer = paddle.DataParallel(layer) loss_fn = nn.MSELoss() adam = opt.Adam( learning_rate=0.001, parameters=dp_layer.parameters()) # 3. run layer inputs = paddle.randn([10, 10], 'float32') outputs = dp_layer(inputs) labels = paddle.randn([10, 1], 'float32') loss = loss_fn(outputs, labels) loss.backward() adam.step() adam.clear_grad() assert len(loss) == 1
def train(): # 1. initialize parallel environment (cpu & gpu) dist.init_parallel_env() # 2. set cpu place paddle.set_device('cpu') # 3. create data parallel layer & optimizer layer = LinearNet() dp_layer = paddle.DataParallel(layer) loss_fn = nn.MSELoss() adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters()) # 4. run layer inputs = paddle.randn([10, 10], 'float32') outputs = dp_layer(inputs) labels = paddle.randn([10, 1], 'float32') loss = loss_fn(outputs, labels) loss.backward() adam.step() adam.clear_grad()
def run( self, image, need_align=False, start_lr=0.1, final_lr=0.025, latent_level=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], # for ffhq (0~17) step=100, mse_weight=1, pre_latent=None): if need_align: src_img = run_alignment(image) else: src_img = Image.open(image).convert("RGB") generator = self.generator generator.train() percept = LPIPS(net='vgg') # on PaddlePaddle, lpips's default eval mode means no gradients. percept.train() n_mean_latent = 4096 transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(256), transforms.Transpose(), transforms.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5]), ]) imgs = paddle.to_tensor(transform(src_img)).unsqueeze(0) if pre_latent is None: with paddle.no_grad(): noise_sample = paddle.randn( (n_mean_latent, generator.style_dim)) latent_out = generator.style(noise_sample) latent_mean = latent_out.mean(0) latent_in = latent_mean.detach().clone().unsqueeze(0).tile( (imgs.shape[0], 1)) latent_in = latent_in.unsqueeze(1).tile( (1, generator.n_latent, 1)).detach() else: latent_in = paddle.to_tensor(np.load(pre_latent)).unsqueeze(0) var_levels = list(latent_level) const_levels = [ i for i in range(generator.n_latent) if i not in var_levels ] assert len(var_levels) > 0 if len(const_levels) > 0: latent_fix = latent_in.index_select(paddle.to_tensor(const_levels), 1).detach().clone() latent_in = latent_in.index_select(paddle.to_tensor(var_levels), 1).detach().clone() latent_in.stop_gradient = False optimizer = optim.Adam(parameters=[latent_in], learning_rate=start_lr) pbar = tqdm(range(step)) for i in pbar: t = i / step lr = get_lr(t, step, start_lr, final_lr) optimizer.set_lr(lr) if len(const_levels) > 0: latent_dict = {} for idx, idx2 in enumerate(var_levels): latent_dict[idx2] = latent_in[:, idx:idx + 1] for idx, idx2 in enumerate(const_levels): latent_dict[idx2] = (latent_fix[:, idx:idx + 1]).detach() latent_list = [] for idx in range(generator.n_latent): latent_list.append(latent_dict[idx]) latent_n = paddle.concat(latent_list, 1) else: latent_n = latent_in img_gen, _ = generator([latent_n], input_is_latent=True, randomize_noise=False) batch, channel, height, width = img_gen.shape if height > 256: factor = height // 256 img_gen = img_gen.reshape((batch, channel, height // factor, factor, width // factor, factor)) img_gen = img_gen.mean([3, 5]) p_loss = percept(img_gen, imgs).sum() mse_loss = F.mse_loss(img_gen, imgs) loss = p_loss + mse_weight * mse_loss optimizer.clear_grad() loss.backward() optimizer.step() pbar.set_description( (f"perceptual: {p_loss.numpy()[0]:.4f}; " f"mse: {mse_loss.numpy()[0]:.4f}; lr: {lr:.4f}")) img_gen, _ = generator([latent_n], input_is_latent=True, randomize_noise=False) dst_img = make_image(img_gen)[0] dst_latent = latent_n.numpy()[0] os.makedirs(self.output_path, exist_ok=True) save_src_path = os.path.join(self.output_path, 'src.fitting.png') cv2.imwrite(save_src_path, cv2.cvtColor(np.asarray(src_img), cv2.COLOR_RGB2BGR)) save_dst_path = os.path.join(self.output_path, 'dst.fitting.png') cv2.imwrite(save_dst_path, cv2.cvtColor(dst_img, cv2.COLOR_RGB2BGR)) save_npy_path = os.path.join(self.output_path, 'dst.fitting.npy') np.save(save_npy_path, dst_latent) return np.asarray(src_img), dst_img, dst_latent
epoch = 5 output = "work/Output/" batch_size = 128 G_DIMENSION = 100 beta1=0.5 beta2=0.999 output_path = 'work/Output' device = paddle.set_device('gpu') paddle.disable_static(device) real_label = 1. fake_label = 0. netD = Discriminator() netG = Generator() optimizerD = optim.Adam(parameters=netD.parameters(), learning_rate=lr, beta1=beta1, beta2=beta2) optimizerG = optim.Adam(parameters=netG.parameters(), learning_rate=lr, beta1=beta1, beta2=beta2) ###训练过程 losses = [[], []] #plt.ion() now = 0 for pass_id in range(epoch): # enumerate()函数将一个可遍历的数据对象组合成一个序列列表 for batch_id, data in enumerate(train_loader()): #训练判别器 optimizerD.clear_grad() real_cpu = data[0] label = paddle.full((batch_size,1,1,1),real_label,dtype='float32') output = netD(real_cpu) errD_real = loss(output,label)
def test_optimizer_with_varbase_input(self): x = paddle.zeros([2, 3]) with self.assertRaises(TypeError): optimizer.Adam(learning_rate=self.lr, parameters=x)
model = DeepFRI(args) task_name = os.path.split(args.label_data_path)[-1] task_name = os.path.splitext(task_name)[0] args.task = task_name time_stamp = str(datetime.now()).replace(":", "-").replace(" ", "_").split(".")[0] args.model_name = ( f"models/{model.__class__.__name__}_{args.gc_layer}_{args.task}_{time_stamp}" ) loss_fn = BCEWithLogitsLoss(reduction="none") optimizer = optim.Adam( parameters=model.parameters(), learning_rate=args.lr, beta1=0.95, beta2=0.99, weight_decay=args.weight_decay, ) model_save_dir = os.path.split(args.model_name)[0] if model_save_dir: try: os.makedirs(model_save_dir) except FileExistsError: pass print( f"\n{args.task}: Training on {len(train_dataset)} protein samples and {len(valid_dataset)} for validation." ) print(f"Starting at {datetime.now()}\n") print(args)
# loss = nn.BCELoss() # 改为 最小二乘损失函数 MSELoss loss = nn.MSELoss() # Create batch of latent vectors that we will use to visualize # the progression of the generator fixed_noise = paddle.randn([32, 100, 1, 1], dtype='float32') # Establish convention for real and fake labels during training real_label = 1. fake_label = 0. # Setup Adam optimizers for both G and D optimizerD = optim.Adam(parameters=netD.parameters(), learning_rate=0.0002, beta1=0.5, beta2=0.999) optimizerG = optim.Adam(parameters=netG.parameters(), learning_rate=0.0002, beta1=0.5, beta2=0.999) losses = [[], []] #plt.ion() now = 0 number = 0 for pass_id in range(100): for batch_id, (data, target) in enumerate(dataloader): ############################ # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) ###########################
opt.step() opt.clear_grad() print("Epoch {} batch {}: loss = {}".format( epoch_id, batch_id, np.mean(loss.numpy()))) # enable dygraph mode place = paddle.CPUPlace() paddle.disable_static(place) # 1. train & save model. # create network layer = LinearNet() loss_fn = nn.CrossEntropyLoss() adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters()) # create data loader dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) loader = paddle.io.DataLoader(dataset, places=place, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=2) # train train(layer, loader, loss_fn, adam) # save model_path = "linear.example.model"
def __init__(self, in_size, out_size): super(SimpleNet, self).__init__() self._linear = nn.Linear(in_size, out_size) @paddle.jit.to_static def forward(self, x): y = self._linear(x) z = self._linear(y) return z # enable dygraph mode paddle.disable_static() # train model net = SimpleNet(8, 8) adam = opt.Adam(learning_rate=0.1, parameters=net.parameters()) x = paddle.randn([4, 8], 'float32') for i in range(10): out = net(x) loss = paddle.tensor.mean(out) loss.backward() adam.step() adam.clear_grad() model_path = "simplenet.example.model.separate_params" config = paddle.jit.SaveLoadConfig() config.separate_params = True # saving with configs.separate_params paddle.jit.save( layer=net,
# enable dygraph mode paddle.disable_static(place) # load fc = paddle.jit.load(model_path) # inference fc.eval() x = paddle.randn([1, IMAGE_SIZE], 'float32') pred = fc(x) # fine-tune fc.train() loss_fn = nn.CrossEntropyLoss() adam = opt.Adam(learning_rate=0.001, parameters=fc.parameters()) loader = paddle.io.DataLoader(dataset, places=place, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=2) for epoch_id in range(EPOCH_NUM): for batch_id, (image, label) in enumerate(loader()): out = fc(image) loss = loss_fn(out, label) loss.backward() adam.step() adam.clear_grad() print("Epoch {} batch {}: loss = {}".format( epoch_id, batch_id, np.mean(loss.numpy())))
def __init__(self): super(LinearNet, self).__init__() self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) @paddle.jit.to_static def forward(self, x): return self._linear(x) # set device paddle.set_device('gpu' if USE_GPU else 'cpu') # create network layer = LinearNet() dp_layer = paddle.DataParallel(layer) loss_fn = nn.CrossEntropyLoss() adam = opt.Adam(learning_rate=0.001, parameters=dp_layer.parameters()) # create data loader loader = paddle.io.DataLoader.from_generator(capacity=5) loader.set_batch_generator(random_batch_reader()) for epoch_id in range(EPOCH_NUM): for batch_id, (image, label) in enumerate(loader()): out = layer(image) loss = loss_fn(out, label) loss.backward() adam.step() adam.clear_grad() print("Epoch {} batch {}: loss = {}".format(
x = F.relu(F.dropout(self.fc1(inputs), 0.6)) x = self.fc2(x) return F.softmax(x, -1) def select_action(self, inputs): x = paddle.to_tensor(inputs).astype('float32').unsqueeze(0) probs = self.forward(x) m = Categorical(probs) action = m.sample([1]) self.saved_log_probs.append(m.log_prob(action)) return action policy = Policy() optimizer = optim.Adam(parameters=policy.parameters(), learning_rate=1e-2) eps = np.finfo(np.float32).eps.item() def finish_episode(): R = 0 policy_loss = [] for r in policy.rewards[::-1]: R = r + gamma * R returns = paddle.full([len(policy.rewards)], R) returns = (returns - returns.mean()) / (returns.std() + eps) for log_prob, R in zip(policy.saved_log_probs, returns): policy_loss.append(-log_prob * R) optimizer.clear_grad() policy_loss = paddle.concat(policy_loss).sum() policy_loss.backward() optimizer.step()