Exemple #1
0
    def Start(self):

        filePath = self.Config.FilePath.BestModel

        cnf = "BestLog/BestLog180126082522.cnf"
        wgt = "BestLog/BestLog180126082522.wgt"
        timeLimit = 0.9

        net = Network()
        net.Load(cnf, wgt)
        net.TimeLimit = timeLimit

        model = Model()
        taskName = "TaskEval/EvalTask114.task"
        task = MujocoTask(model, taskName)
        #task = MujocoTask.LoadRandom(model, self.Config.Task.EvalDir)
        env = MujocoEnv(model)

        agentConfig = self.Config.ViewerAgent
        agent = Agent(agentConfig, net, model, task)

        bestAction = agent.SearchBestAction()

        while True:

            env.SetSimState(task.StartState)

            for action in bestAction:

                env.Step(action)

                #print(env.GetObservation(task))
                env.Render()
Exemple #2
0
    def Expand(self, network: NetworkModel, env: MujocoEnv, task, valueCalc):

        env.SetSimState(self.Parent.State)
        env.Step(self.ActionNum)

        self.State = env.GetSimState()
        self.Observation = env.GetObservation(task, network.TimeLimit)
        self.Score = env.GetScore(task)
        self.IsTerminate = env.IsTerminate(task, self.Score, network.TimeLimit)

        policy_arr, value_arr = network.Model.predict(
            np.array([self.Observation]))

        policy = policy_arr[0]

        value = np.sum(value_arr[0]) / len(value_arr[0])

        if self.IsTerminate == True:
            value = valueCalc.CalcValue(self.Score)

        for i in range(len(policy)):
            self.Children.append(Node(self, policy[i], i))

        self.IsExpanded = True
        self.N = 1
        self.W = value
        self.Q = value

        return value
    def MakeHopperTask(self, modelNum, trainNum, taskTrainDir, evalNum,
                       taskEvalDir):

        env = MujocoEnv(self)

        state = env.GetSimState()

        jsons = []

        for j in range(modelNum):

            if j != 0:
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "rootx")] = random.uniform(-0.8, 0.8)
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "rootz")] = random.uniform(2, 2)
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "thigh_joint")] = pt = random.uniform(-0.1, -0.5)
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "leg_joint")] = pl = random.uniform(-0.1, -0.5)
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "rooty")] = random.uniform(-0.2, 0.2) + (pt + pl) / 2
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "foot_joint")] = random.uniform(-0.5, 0.5)

            env.SetSimState(state)

            ok = False

            for i in range(400):
                env.Step(env.GetActionNum() - 1)

                sensor = env.GetSensorValue(3, "a_foot_joint")
                if sensor[2] >= 20 and i >= 10:
                    ok = True
                    break

            if ok == False:
                continue

            joints = [
                'thigh_joint', 'leg_joint', 'foot_joint', 'rooty', 'rootx',
                'rootz'
            ]

            task = {}
            for i in joints:
                task[i] = env.GetSensorValue(1, "jp_" + i)[0]

            jsons.append(task)

        for i in range(trainNum):

            filePath = taskTrainDir + "/TrainTask" + str(i) + ".task"

            print(filePath)

            task1 = random.choice(jsons)
            task2 = jsons[0]

            with open(filePath, "wt") as f:
                json.dump(list([task1, task2]), f)

        for i in range(evalNum):

            filePath = taskEvalDir + "/EvalTask" + str(i) + ".task"

            print(filePath)

            task1 = random.choice(jsons)
            task2 = jsons[0]

            with open(filePath, "wt") as f:
                json.dump(list([task1, task2]), f)