def Start(self): filePath = self.Config.FilePath.BestModel cnf = "BestLog/BestLog180126082522.cnf" wgt = "BestLog/BestLog180126082522.wgt" timeLimit = 0.9 net = Network() net.Load(cnf, wgt) net.TimeLimit = timeLimit model = Model() taskName = "TaskEval/EvalTask114.task" task = MujocoTask(model, taskName) #task = MujocoTask.LoadRandom(model, self.Config.Task.EvalDir) env = MujocoEnv(model) agentConfig = self.Config.ViewerAgent agent = Agent(agentConfig, net, model, task) bestAction = agent.SearchBestAction() while True: env.SetSimState(task.StartState) for action in bestAction: env.Step(action) #print(env.GetObservation(task)) env.Render()
def Expand(self, network: NetworkModel, env: MujocoEnv, task, valueCalc): env.SetSimState(self.Parent.State) env.Step(self.ActionNum) self.State = env.GetSimState() self.Observation = env.GetObservation(task, network.TimeLimit) self.Score = env.GetScore(task) self.IsTerminate = env.IsTerminate(task, self.Score, network.TimeLimit) policy_arr, value_arr = network.Model.predict( np.array([self.Observation])) policy = policy_arr[0] value = np.sum(value_arr[0]) / len(value_arr[0]) if self.IsTerminate == True: value = valueCalc.CalcValue(self.Score) for i in range(len(policy)): self.Children.append(Node(self, policy[i], i)) self.IsExpanded = True self.N = 1 self.W = value self.Q = value return value
def MakeHopperTask(self, modelNum, trainNum, taskTrainDir, evalNum, taskEvalDir): env = MujocoEnv(self) state = env.GetSimState() jsons = [] for j in range(modelNum): if j != 0: state.qpos[env.Model.MujocoModel.get_joint_qpos_addr( "rootx")] = random.uniform(-0.8, 0.8) state.qpos[env.Model.MujocoModel.get_joint_qpos_addr( "rootz")] = random.uniform(2, 2) state.qpos[env.Model.MujocoModel.get_joint_qpos_addr( "thigh_joint")] = pt = random.uniform(-0.1, -0.5) state.qpos[env.Model.MujocoModel.get_joint_qpos_addr( "leg_joint")] = pl = random.uniform(-0.1, -0.5) state.qpos[env.Model.MujocoModel.get_joint_qpos_addr( "rooty")] = random.uniform(-0.2, 0.2) + (pt + pl) / 2 state.qpos[env.Model.MujocoModel.get_joint_qpos_addr( "foot_joint")] = random.uniform(-0.5, 0.5) env.SetSimState(state) ok = False for i in range(400): env.Step(env.GetActionNum() - 1) sensor = env.GetSensorValue(3, "a_foot_joint") if sensor[2] >= 20 and i >= 10: ok = True break if ok == False: continue joints = [ 'thigh_joint', 'leg_joint', 'foot_joint', 'rooty', 'rootx', 'rootz' ] task = {} for i in joints: task[i] = env.GetSensorValue(1, "jp_" + i)[0] jsons.append(task) for i in range(trainNum): filePath = taskTrainDir + "/TrainTask" + str(i) + ".task" print(filePath) task1 = random.choice(jsons) task2 = jsons[0] with open(filePath, "wt") as f: json.dump(list([task1, task2]), f) for i in range(evalNum): filePath = taskEvalDir + "/EvalTask" + str(i) + ".task" print(filePath) task1 = random.choice(jsons) task2 = jsons[0] with open(filePath, "wt") as f: json.dump(list([task1, task2]), f)