Beispiel #1
0
	def __init__(self):
		np.random.seed(seed = int(time.time()))
		self.env = Env(600)

		self.num_slaves = 16
		self.num_state = self.env.GetNumState()
		self.num_action = self.env.GetNumAction()
		self.num_dofs = self.env.GetNumDofs()
		self.num_muscles = self.env.GetNumMuscles()

		self.num_epochs = 10
		self.num_epochs_muscle = 3
		self.num_evaluation = 0
		self.num_tuple_so_far = 0
		self.num_episode = 0
		self.num_tuple = 0
		self.num_simulation_Hz = self.env.GetSimulationHz()
		self.num_control_Hz = self.env.GetControlHz()
		self.num_simulation_per_control = self.num_simulation_Hz // self.num_control_Hz

		self.gamma = 0.95
		self.lb = 0.95
		
		
		self.buffer_size = 2048
		self.batch_size = 128
		self.muscle_batch_size = 128
		self.replay_buffer = ReplayBuffer(30000)
		self.muscle_buffer = MuscleBuffer(self.buffer_size*4)

		self.model = SimulationNN(self.num_state,self.num_action)
		self.muscle_model = MuscleNN(self.env.GetNumTotalMuscleRelatedDofs(),self.num_dofs-6,self.num_muscles)

		if use_cuda:
			self.model.cuda()
			self.muscle_model.cuda()

		self.default_learning_rate = 1E-4
		self.default_clip_ratio = 0.2
		self.learning_rate = self.default_learning_rate
		self.clip_ratio = self.default_clip_ratio
		self.optimizer = optim.Adam(self.model.parameters(),lr=self.learning_rate)
		self.optimizer_muscle = optim.Adam(self.muscle_model.parameters(),lr=self.learning_rate)
		self.max_iteration = 50000

		self.w_entropy = 0.0001

		self.loss_actor = 0.0
		self.loss_critic = 0.0
		self.loss_muscle = 0.0
		self.rewards = []
		self.sum_return = 0.0
		self.max_return = -1.0
		self.max_return_epoch = 1
		self.tic = time.time()

		self.episodes = [None]*self.num_slaves
		for j in range(self.num_slaves):
			self.episodes[j] = EpisodeBuffer()
		self.env.Resets(True)
Beispiel #2
0
def worker(meta_file, proc_num, state_sender, result_sender, action_receiver,
           reset_receiver, sample_receiver):
    """

    :type meta_file: str
    :type proc_num: int
    :type result_sender: Connection
    :type state_sender: Connection
    :type action_receiver: Connection
    :type reset_receiver: Connection
    :type sample_receiver: Connection
    :return:
    """

    # reset variable
    # 0 : go on (no reset)
    # 1 : reset
    # 2 : reset with marginal samples

    env = Env(meta_file, proc_num)

    current_path = os.path.dirname(
        os.path.abspath(__file__)) + '/pushrecoverybvhgenerator'
    origmot = bvf.readBvhFile_JointMotion(
        current_path + '/data/walk_simple.bvh', 1.)
    jed.alignMotionToOrigin(origmot)

    state = None
    while True:
        reset_flag = reset_receiver.recv()

        if reset_flag == 2:
            marginal_sample = sample_receiver.recv()
            env.SetMarginalSampled(marginal_sample[0], marginal_sample[1])

        if reset_flag == 1 or reset_flag == 2:
            env.Reset1()
            if env.IsWalkingParamChange():
                walking_param = env.GetWalkingParams()
                bvh_str = bvh_generator_server.get_paramed_bvh_walk(
                    origmot,
                    walking_param[0],
                    walking_param[1],
                    walking_param[2],
                    scale=1.)
                env.SetBvhStr(bvh_str)
            env.Reset2(True)
            state = env.GetState()

        state_sender.send(state)
        action = action_receiver.recv()
        env.SetAction(action)
        env.StepsAtOnce()
        state = env.GetState()
        reward = env.GetReward()
        is_done = env.IsEndOfEpisode()
        result_sender.send((reward, is_done, proc_num))
Beispiel #3
0
Datei: PPO.py Projekt: snumrl/MSS
	def __init__(self,num_slaves):
		np.random.seed(seed = int(time.time()))
		self.env = Env(num_slaves)

		self.num_slaves = num_slaves
		self.num_state = self.env.GetNumState()
		self.num_action = self.env.GetNumAction()
		self.num_dofs = self.env.GetNumDofs()
		self.num_muscles = self.env.GetNumMuscles()

		self.num_epochs = 20
		self.num_epochs_muscle = 10
		self.num_evaluation = 0
		self.num_tuple_so_far = 0
		self.num_episode = 0
		self.num_tuple = 0
		self.num_simulation_Hz = self.env.GetSimulationHz()
		self.num_control_Hz = self.env.GetControlHz()
		self.num_total_muscle_related_dofs = self.env.GetNumTotalMuscleRelatedDofs()
		self.num_simulation_per_control = self.num_simulation_Hz // self.num_control_Hz
		self.use_muscle_nn = True		

		self.gamma = 0.99
		self.lb = 0.95
		self.clip_ratio = 0.2
		
		self.buffer_size = 2048
		self.batch_size = 128
		self.muscle_batch_size = 128
		self.replay_buffer = ReplayBuffer(30000)
		self.muscle_buffer = MuscleBuffer(self.buffer_size*4)

		self.model = SimulationNN(self.num_state,self.num_action)
		self.muscle_model = MuscleNN(self.num_total_muscle_related_dofs,self.num_dofs-6,self.num_muscles)

		if use_cuda:
			self.model.cuda()
			self.muscle_model.cuda()

		self.optimizer = optim.Adam(self.model.parameters(),lr=1E-4)
		self.optimizer_muscle = optim.Adam(self.muscle_model.parameters(),lr=5E-5)

		self.w_entropy = 0.0

		self.alpha = 1.0
		self.alpha_decay = 200.0
		self.loss_actor = []
		self.loss_critic = []
		self.loss_muscle = []
		self.rewards = []
		self.sum_return = 0.0
		self.threshold = 6.0
		self.current_avg_reward = 0.0
		self.tic = time.time()
    def __init__(self, meta_file, num_slaves=16):
        # plt.ion()
        np.random.seed(seed=int(time.time()))
        self.num_slaves = num_slaves
        self.meta_file = meta_file
        self.env = Env(meta_file, -1)
        self.use_muscle = self.env.UseMuscle()
        self.num_state = self.env.GetNumState()
        self.num_action = self.env.GetNumAction()
        self.num_muscles = self.env.GetNumMuscles()

        self.num_epochs = 10
        self.num_epochs_muscle = 3
        self.num_evaluation = 0
        self.num_tuple_so_far = 0
        self.num_episode = 0
        self.num_tuple = 0
        self.num_simulation_Hz = self.env.GetSimulationHz()
        self.num_control_Hz = self.env.GetControlHz()
        self.num_simulation_per_control = self.num_simulation_Hz // self.num_control_Hz

        self.gamma = 0.95
        self.lb = 0.99

        self.buffer_size = 8192
        self.batch_size = 256
        self.muscle_batch_size = 128
        self.replay_buffer = ReplayBuffer(30000)
        self.muscle_buffer = MuscleBuffer(30000)

        self.model = SimulationNN(self.num_state,self.num_action)

        self.muscle_model = MuscleNN(self.env.GetNumTotalMuscleRelatedDofs(),self.num_action,self.num_muscles)

        if use_cuda:
            self.model.cuda()
            self.muscle_model.cuda()

        self.default_learning_rate = 1E-4
        self.default_clip_ratio = 0.2
        self.learning_rate = self.default_learning_rate
        self.clip_ratio = self.default_clip_ratio
        self.optimizer = optim.Adam(self.model.parameters(),lr=self.learning_rate)
        self.optimizer_muscle = optim.Adam(self.muscle_model.parameters(),lr=self.learning_rate)
        self.max_iteration = 50000

        self.w_entropy = -0.001

        self.loss_actor = 0.0
        self.loss_critic = 0.0
        self.loss_muscle = 0.0
        self.rewards = []
        self.sum_return = 0.0
        self.max_return = -1.0
        self.max_return_epoch = 1
        self.tic = time.time()

        # for adaptive sampling, marginal value training
        self.use_adaptive_sampling = self.env.UseAdaptiveSampling()
        self.marginal_state_num = self.env.GetMarginalStateNum()
        self.marginal_buffer = MargianlBuffer(30000)
        self.marginal_model = MarginalNN(self.marginal_state_num)
        self.marginal_value_avg = 1.
        self.marginal_learning_rate = 1e-3
        if use_cuda:
            self.marginal_model.cuda()
        self.marginal_optimizer = optim.SGD(self.marginal_model.parameters(), lr=self.marginal_learning_rate)
        self.marginal_loss = 0.0
        self.marginal_samples = []
        self.marginal_sample_cumulative_prob = []
        self.marginal_sample_num = 2000
        self.marginal_k = self.env.GetMarginalParameter()
        self.mcmc_burn_in = 1000
        self.mcmc_period = 20

        self.total_episodes = []

        self.state_sender = []  # type: list[Connection]
        self.result_sender = []  # type: list[Connection]
        self.state_receiver = []  # type: list[Connection]
        self.result_receiver = []  # type: list[Connection]
        self.action_sender = []  # type: list[Connection]
        self.reset_sender = []  # type: list[Connection]
        self.marginal_sample_sender = []  # type: list[Connection]
        self.envs = []  # type: list[Process]

        self.init_envs()
        self.idx = 0