Beispiel #1
0
    def __init__(self):
        self.envs = gym.make('Walker2d-v2')
        self.envs.seed(args.seed)
        self.envpoch = 2048
        self.inputsize = 17
        self.actionsize = 6
        self.net = PPOnet(self.inputsize, self.actionsize).double().to(device)
        self.optimizer = optim.Adam(self.net.parameters(),
                                    lr=args.lr,
                                    eps=args.eps)

        self.clip_param = 0.2
        self.PPOepoch = args.ppoepoch

        self.gamma = args.gamma
        self.lam = args.gaelambda
        self.out_record = None
        self.trajectories = []
        self.path_lsa = './csvfiles/lossa_lr' + str(
            args.lr) + '_ppoepoch' + str(args.ppoepoch) + '_gamma' + str(
                args.gamma) + '_gaelambda' + str(args.gaelambda) + '.csv'
        self.path_lsv = './csvfiles/lossv_lr' + str(
            args.lr) + '_ppoepoch' + str(args.ppoepoch) + '_gamma' + str(
                args.gamma) + '_gaelambda' + str(args.gaelambda) + '.csv'
        self.path_ep = './csvfiles/episode_lr' + str(
            args.lr) + '_ppoepoch' + str(args.ppoepoch) + '_gamma' + str(
                args.gamma) + '_gaelambda' + str(args.gaelambda) + '.csv'
        self.scaler = utils.Scaler(17)
Beispiel #2
0
    def __init__(self):
        self.env = gym.make('Walker2d-v2')
        #self.env.seed(args.seed)
        self.envpoch = 2048
        self.inputsize = 17
        self.actionsize = 6
        self.actor = Actor().double().to(device)
        self.actor_target = Actor().double().to(device)
        self.actor_optimizer = torch.optim.RMSprop(self.actor.parameters(), lr=1e-4)

        self.critic = Critic().double().to(device)
        self.critic_target = Critic().double().to(device)
        self.critic_optimizer = torch.optim.RMSprop(self.critic.parameters(), lr=1e-3)
        #self.random_noise = random_process.OrnsteinUhlenbeckActionNoise(self.actionsize)

        self.clip_param = 0.2
        self.PPOepoch = args.ppoepoch

        self.gamma = args.gamma
        self.lam = args.gaelambda
        self.out_record = None
        self.trainstep = 0
        self.trajectories = []

        self.path_lsa = './csvfiles/lossa_lr.csv'
        self.path_lsv = './csvfiles/lossv_lr.csv'
        self.path_ep = './csvfiles/episode_lr.csv'
        self.scaler = utils.Scaler(17)
Beispiel #3
0
    def __init__(self):
        self.env = gym.make('Walker2d-v2')
        self.envpoch = 1000
        self.valuenet = valuenet().double().to(device)
        self.policynet = policynet().double().to(device)
        self.optimizer_value = optim.Adam(self.valuenet.parameters(),
                                          lr=7e-5,
                                          eps=1e-7)
        self.optimizer_policy = optim.Adam(self.policynet.parameters(),
                                           lr=7e-5,
                                           eps=1e-7)
        self.clip_param = 0.2

        self.memory = store()
        self.gamma = 0.99
        self.lam = 0.95
        self.out_record = None
        self.trajectories = []

        self.path_t7 = 'model_doublenet.t7'
        self.path_lsa = "loss_doublenet_a.csv"
        self.path_lsv = "loss_doublenet_v.csv"
        self.path_ep = "episode_doublenet.csv"
        self.scaler = utils.Scaler(18)

        if os.path.isfile(self.path_t7):
            self.net.load_state_dict(
                torch.load(self.path_t7, map_location='cpu'))
Beispiel #4
0
 def __init__(self):
     self.env = gym.make('Walker2d-v2')
     self.envpoch = 1000
     self.net = PPOnet().double().to(device)
     self.optimizer = optim.Adam(self.net.parameters(),
                                 lr=args.lr,
                                 eps=args.eps)
     self.clip_param = 0.2
     self.PPOepoch = args.ppoepoch
     self.memory = store()
     self.gamma = 0.99
     self.lam = 0.95
     self.out_record = None
     self.trajectories = []
     self.path_lsa = './csvfiles/lossa_lr' + str(
         args.lr) + '_ppoepoch' + str(args.ppoepoch) + '.csv'
     self.path_lsv = './csvfiles/lossv_lr' + str(
         args.lr) + '_ppoepoch' + str(args.ppoepoch) + '.csv'
     self.path_ep = './csvfiles/episode_lr' + str(
         args.lr) + '_ppoepoch' + str(args.ppoepoch) + '.csv'
     self.scaler = utils.Scaler(18)