コード例 #1
0
ファイル: td3.py プロジェクト: marsXyr/DP-ERL
    def __init__(self, state_dim, action_dim, max_action, memory, args):

        # misc
        self.criterion = nn.MSELoss()
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.max_action = max_action
        self.memory = memory
        self.n = args.n_actor

        # actor
        self.actors = [Actor(state_dim, action_dim, max_action,
                             layer_norm=args.layer_norm) for i in range(self.n)]
        self.actors_target = [Actor(
            state_dim, action_dim, max_action, layer_norm=args.layer_norm) for i in range(self.n)]
        self.actors_optimizer = [torch.optim.Adam(
            self.actors[i].parameters(), lr=args.actor_lr) for i in range(self.n)]

        for i in range(self.n):
            self.actors_target[i].load_state_dict(self.actors[i].state_dict())

        # critic
        self.critic = CriticTD3(state_dim, action_dim,
                                layer_norm=args.layer_norm)
        self.critic_target = CriticTD3(
            state_dim, action_dim, layer_norm=args.layer_norm)
        self.critic_target.load_state_dict(self.critic.state_dict())
        self.critic_optimizer = torch.optim.Adam(
            self.critic.parameters(), lr=args.critic_lr)

        # cuda
        if torch.cuda.is_available():
            for i in range(self.n):
                self.actors[i] = self.actors[i].cuda()
                self.actors_target[i] = self.actors_target[i].cuda()
            self.critic = self.critic.cuda()
            self.critic_target = self.critic_target.cuda()

        # shared memory
        for i in range(self.n):
            self.actors[i].share_memory()
            self.actors_target[i].share_memory()
        self.critic.share_memory()
        self.critic_target.share_memory()

        # hyper-parameters
        self.tau = args.tau
        self.discount = args.discount
        self.batch_size = args.batch_size
        self.policy_noise = args.policy_noise
        self.noise_clip = args.noise_clip
        self.policy_freq = args.policy_freq
コード例 #2
0
    def __init__(self, state_dim, action_dim, max_action, memory, args):

        # actor
        self.actor = Actor(state_dim,
                           action_dim,
                           max_action,
                           layer_norm=args.layer_norm)
        self.actor_target = Actor(state_dim,
                                  action_dim,
                                  max_action,
                                  layer_norm=args.layer_norm)
        self.actor_target.load_state_dict(self.actor.state_dict())
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                lr=args.actor_lr)

        # critic
        self.critic = CriticTD3(state_dim,
                                action_dim,
                                layer_norm=args.layer_norm)
        self.critic_target = CriticTD3(state_dim,
                                       action_dim,
                                       layer_norm=args.layer_norm)
        self.critic_target.load_state_dict(self.critic.state_dict())
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 lr=args.critic_lr)

        # cuda
        if torch.cuda.is_available():
            self.actor = self.actor.cuda()
            self.actor_target = self.actor_target.cuda()
            self.critic = self.critic.cuda()
            self.critic_target = self.critic_target.cuda()

        # misc
        self.criterion = nn.MSELoss()
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.max_action = max_action
        self.memory = memory

        # hyper-parameters
        self.tau = args.tau
        self.discount = args.discount
        self.batch_size = args.batch_size
        self.policy_noise = args.policy_noise
        self.noise_clip = args.noise_clip
        self.policy_freq = args.policy_freq
コード例 #3
0
    #création d'un environnement Swimmer :
    print("Creating environment")
    env = gym.make('Swimmer-v2')
    #itialisation de l'environnement :
    env.reset()
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    max_action = int(env.action_space.high[0])

    for filename in filenames:
        #chargement de l'acteur :
        print("Loading actor")
        actor = Actor(state_dim, action_dim, max_action, args)
        actor.load_model(actor_directory, "actor" + filename)
        critic = CriticTD3(state_dim, action_dim, layer_norm=args.layer_norm)
        critic.load_model(actor_directory, "critic" + filename)

        paramsA = np.array(actor.get_params())
        print("min : " + str(np.min(paramsA)))
        print("max : " + str(np.max(paramsA)))
        picture = np.reshape(
            paramsA[1:-1],
            (249,
             500))  #on reshape les paramètres (on perd 2 valeurs, tant pis)
        plt.imsave("actor" + filename, picture, vmin=-3, vmax=3, format='png')
        paramsC = np.array(critic.get_params())
        print("min : " + str(np.min(paramsC)))
        print("max : " + str(np.max(paramsC)))
        picture2 = np.reshape(
            paramsC[1:-1],