コード例 #1
0
            done = 1
             
          if done:
            
            # Record total reward
            EPs_total_reward.append(EP_reward_sum)            

            
            states = np.vstack(states)
            next_states = np.vstack(next_states)
            actions = np.vstack(actions)
            values = get_return(values,0.95)
            values = np.vstack(values)
            
            for ve in range(CRITIC_EPOCHS):                  
                critic_loss = tuc.train_critic(states, values, actions)     
 
            states = []
            next_states = []
            actions = []
            values = []
            
            # Print total reward          
            print("PG episode : {0: <5} , total reward : {1: <5}".format(EP,EP_reward_sum))
            
   
            # Stop to train agent
            PG_agent.agent_REINFORCE()
           
            break
            
コード例 #2
0
ファイル: Run_PG_TUC.py プロジェクト: johanesn/Wei-Lin-Liao
            # Move to the next state
            state = next_state

            # Perform the optimization
            if done:

                states = torch.cat(agent.states)
                values = torch.tensor(np.expand_dims(np.array(
                    agent.get_values()),
                                                     axis=1),
                                      dtype=torch.float32)

                actions_matrix = torch.cat(actions_matrix, 0).cuda()
                #print(len(actions_matrix))
                for e in range(3):
                    TUC_dynamic.train_critic(states.cuda(), values.cuda(),
                                             actions_matrix.cuda())

                print("updating agent !")
                agent.REINFORCE()

                break

        #==================== loop of training procedure ==========================================#

    time_cost = time.time() - now
    print('epoch = %d, time_cost = %.4f' % (epoch, time_cost))

# save the whole model
agent.save_model("./model_final/pg_TUC_agent_2")
TUC_dynamic.save_model("./model_final/pg_TUC_2")
print('Complete')