Ejemplo n.º 1
0
    def Start(self):

        filePath = self.Config.FilePath.BestModel

        cnf = "BestLog/BestLog180126082522.cnf"
        wgt = "BestLog/BestLog180126082522.wgt"
        timeLimit = 0.9

        net = Network()
        net.Load(cnf, wgt)
        net.TimeLimit = timeLimit

        model = Model()
        taskName = "TaskEval/EvalTask114.task"
        task = MujocoTask(model, taskName)
        #task = MujocoTask.LoadRandom(model, self.Config.Task.EvalDir)
        env = MujocoEnv(model)

        agentConfig = self.Config.ViewerAgent
        agent = Agent(agentConfig, net, model, task)

        bestAction = agent.SearchBestAction()

        while True:

            env.SetSimState(task.StartState)

            for action in bestAction:

                env.Step(action)

                #print(env.GetObservation(task))
                env.Render()
Ejemplo n.º 2
0
    def MakeGenerationModel(self):
        
        filePath = self.Config.FilePath

        hasBest = os.path.exists(filePath.BestModel.Config)
        hasBest |= os.path.exists(filePath.BestModel.Weight)
        
        hasNext = os.path.exists(filePath.NextGeneration.Config)
        hasNext |= os.path.exists(filePath.NextGeneration.Weight)

        if hasBest == False:

            model = MujocoModelHumanoid()
            env = MujocoEnv(model)
            
            dataDir = self.Config.Task.TrainDir
            dataList = os.listdir(dataDir)
            task = MujocoTask(model, dataDir+"/"+dataList[0])


            net = NetworkModel()
            net.Build(self.Config.Build, env.GetObservationShape(task), env.GetActionNum(), self.Config.Worker.InitialTimeLimit)

            print("Make best model")

            net.Save(filePath.BestModel.Config, filePath.BestModel.Weight)


        if hasNext == False:
                
            print("Make next generation model from copy of best model")

            shutil.copyfile(filePath.BestModel.Config, filePath.NextGeneration.Config)
            shutil.copyfile(filePath.BestModel.Weight, filePath.NextGeneration.Weight)
Ejemplo n.º 3
0
    def __init__(self, config, network, model, task):

        assert isinstance(network, NetworkModel)
        assert isinstance(model, MujocoModel)

        self.Config = config
        self.Network = network
        self.Env = MujocoEnv(model)
        self.Task = task
        self.StepTarget = []
        self.TrainData = list([])
        self.ValueCalclater = ValueCaluculator(config.ValueCalc)
Ejemplo n.º 4
0
    def Expand(self, network: NetworkModel, env: MujocoEnv, task, valueCalc):

        env.SetSimState(self.Parent.State)
        env.Step(self.ActionNum)

        self.State = env.GetSimState()
        self.Observation = env.GetObservation(task, network.TimeLimit)
        self.Score = env.GetScore(task)
        self.IsTerminate = env.IsTerminate(task, self.Score, network.TimeLimit)

        policy_arr, value_arr = network.Model.predict(
            np.array([self.Observation]))

        policy = policy_arr[0]

        value = np.sum(value_arr[0]) / len(value_arr[0])

        if self.IsTerminate == True:
            value = valueCalc.CalcValue(self.Score)

        for i in range(len(policy)):
            self.Children.append(Node(self, policy[i], i))

        self.IsExpanded = True
        self.N = 1
        self.W = value
        self.Q = value

        return value
Ejemplo n.º 5
0
    def CalcScore(self, net, filePath):

        bestModel = MujocoModelHumanoid()
        bestTask = MujocoTask(bestModel, filePath)
        bestEnv = MujocoEnv(bestModel)

        bestAgent = Agent(self.Config.CheckerAgent, net, bestModel, bestTask)

        bestAction = bestAgent.SearchBestAction()

        bestScore = self.GetScore(bestEnv, bestTask, bestAction)

        return bestScore
Ejemplo n.º 6
0
    def CalcScores(self, best, next, filePath):

        bestModel = MujocoModelHumanoid()
        bestTask = MujocoTask(bestModel, filePath)
        bestEnv = MujocoEnv(bestModel)


        nextModel = MujocoModelHumanoid()
        nextTask = MujocoTask(nextModel, filePath)
        nextEnv = MujocoEnv(nextModel)

        bestAgent = Agent(self.Config.EvaluateAgent, best, bestModel, bestTask)
        nextAgent = Agent(self.Config.EvaluateAgent, next, nextModel, nextTask)

        bestAction = bestAgent.SearchBestAction()
        nextAction = nextAgent.SearchBestAction()

        bestScore = self.GetScore(bestEnv, bestTask, bestAction)
        nextScore = self.GetScore(nextEnv, nextTask, nextAction)

        #nextAgent.SaveTrainData(self.Config.GetTrainPath("next"))

        return bestScore, nextScore
Ejemplo n.º 7
0
    def Start(self):

        filePath = self.Config.FilePath.NextGeneration

        net = Network()
        net.Load(filePath.Config, filePath.Weight)

        model = Model()
        task = MujocoTask(model, self.GetRandomFile())
        env = MujocoEnv(model)

        agentConfig = self.Config.SelfPlayAgent
        agent = Agent(agentConfig, net, model, task)

        bestAction = agent.SearchBestAction()
        print(bestAction)

        agent.SaveTrainData(self.Config.GetTrainPath())
Ejemplo n.º 8
0
class Agent:
    def __init__(self, config, network, model, task):

        assert isinstance(network, NetworkModel)
        assert isinstance(model, MujocoModel)

        self.Config = config
        self.Network = network
        self.Env = MujocoEnv(model)
        self.Task = task
        self.StepTarget = []
        self.TrainData = list([])
        self.ValueCalclater = ValueCaluculator(config.ValueCalc)

    def SearchBestAction(self):

        bestAction = []

        initialNode = RootNode([], self.Task.StartState)

        firstNode = Node(initialNode, 1, self.Env.Model.NoneAction)
        firstNode.Expand(self.Network, self.Env, self.Task,
                         self.ValueCalclater)

        self.StepTarget.append([firstNode])

        print("Simuration Step ", end=" ")

        for i in range(self.Config.SearchDepthMax):

            if i % 20 == 0:
                print(str(i), end=" ", flush=True)

            isEnd = False

            for node in self.StepTarget[i]:
                isEnd |= node.IsTerminate

            if isEnd:
                break

            searchRoot = RootNode(self.StepTarget[i], None)
            self.StepTarget.append([])

            while True:
                value = self.SearchMoves(searchRoot, 6)

                if value == None:
                    break

                for child in searchRoot.Children:
                    if child.N >= self.Config.SearchAmount:
                        tau = self.Config.GetTau(self.Env.GetTime(),
                                                 self.Network.TimeLimit)
                        self.StepTarget[i + 1].append(child.PickTopChild(tau))

                if len(self.StepTarget[i + 1]) >= self.Config.BeamWidth:
                    break
        print("End")

        resultNodes = self.StepTarget[len(self.StepTarget) - 1]
        resultNodes.sort(key=lambda x: x.Score, reverse=True)

        resultCount = len(resultNodes)

        for i in range(resultCount):

            win = True if i < resultCount / 2 else False

            trainData = self.MakeTrainData(resultNodes[i],
                                           resultNodes[i].Score)

            self.TrainData.extend(trainData)

            if i == 0:
                bestAction = self.GetActionList(resultNodes[i])
                value = self.ValueCalclater.CalcValue(resultNodes[i].Score)
                self.ValueCalclater.AppendScore(resultNodes[i].Score)
                print("result " + str(i) + " Score=" +
                      str(resultNodes[i].Score) + "  Value=" + str(value))

        return bestAction

    def SearchMoves(self, node, noiseEnable):

        assert isinstance(node, Node)

        if node.IsExpanded == False:
            value = node.Expand(self.Network, self.Env, self.Task,
                                self.ValueCalclater)
            return value

        action = node.GetBestAction_PUCT(self.Config.CPuct,
                                         self.Config.DiriclhetAlpha,
                                         self.Config.DiriclhetEpsilon,
                                         noiseEnable > 0)

        if action == None:
            return None

        value = self.SearchMoves(node.Children[action], noiseEnable - 1)

        if value == None:
            return None

        node.N += 1
        node.W += value
        node.Q = node.W / node.N

        return value

    def MakeTrainData(self, node, value):

        assert isinstance(node, Node)

        trainData = list([])

        while True:

            node = node.Parent

            if node.Parent == None:
                break

            if node.PickedPolicy == None:
                continue

            policy = node.PickedPolicy

            trainData.append(list([node.Observation.tolist(), policy, value]))

        trainData.reverse()

        return trainData

    def GetActionList(self, node):

        assert isinstance(node, Node)

        actionList = []

        while True:

            if node.Parent == None:
                break

            actionList.append(node.ActionNum)
            node = node.Parent

        actionList.reverse()

        return actionList

    def SaveTrainData(self, path):

        with open(path, "wt") as f:
            json.dump(self.TrainData, f)
Ejemplo n.º 9
0
    def MakeHopperTask(self, modelNum, trainNum, taskTrainDir, evalNum,
                       taskEvalDir):

        env = MujocoEnv(self)

        state = env.GetSimState()

        jsons = []

        for j in range(modelNum):

            if j != 0:
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "rootx")] = random.uniform(-0.8, 0.8)
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "rootz")] = random.uniform(2, 2)
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "thigh_joint")] = pt = random.uniform(-0.1, -0.5)
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "leg_joint")] = pl = random.uniform(-0.1, -0.5)
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "rooty")] = random.uniform(-0.2, 0.2) + (pt + pl) / 2
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "foot_joint")] = random.uniform(-0.5, 0.5)

            env.SetSimState(state)

            ok = False

            for i in range(400):
                env.Step(env.GetActionNum() - 1)

                sensor = env.GetSensorValue(3, "a_foot_joint")
                if sensor[2] >= 20 and i >= 10:
                    ok = True
                    break

            if ok == False:
                continue

            joints = [
                'thigh_joint', 'leg_joint', 'foot_joint', 'rooty', 'rootx',
                'rootz'
            ]

            task = {}
            for i in joints:
                task[i] = env.GetSensorValue(1, "jp_" + i)[0]

            jsons.append(task)

        for i in range(trainNum):

            filePath = taskTrainDir + "/TrainTask" + str(i) + ".task"

            print(filePath)

            task1 = random.choice(jsons)
            task2 = jsons[0]

            with open(filePath, "wt") as f:
                json.dump(list([task1, task2]), f)

        for i in range(evalNum):

            filePath = taskEvalDir + "/EvalTask" + str(i) + ".task"

            print(filePath)

            task1 = random.choice(jsons)
            task2 = jsons[0]

            with open(filePath, "wt") as f:
                json.dump(list([task1, task2]), f)