Exemple #1
0
    def MakeGenerationModel(self):
        
        filePath = self.Config.FilePath

        hasBest = os.path.exists(filePath.BestModel.Config)
        hasBest |= os.path.exists(filePath.BestModel.Weight)
        
        hasNext = os.path.exists(filePath.NextGeneration.Config)
        hasNext |= os.path.exists(filePath.NextGeneration.Weight)

        if hasBest == False:

            model = MujocoModelHumanoid()
            env = MujocoEnv(model)
            
            dataDir = self.Config.Task.TrainDir
            dataList = os.listdir(dataDir)
            task = MujocoTask(model, dataDir+"/"+dataList[0])


            net = NetworkModel()
            net.Build(self.Config.Build, env.GetObservationShape(task), env.GetActionNum(), self.Config.Worker.InitialTimeLimit)

            print("Make best model")

            net.Save(filePath.BestModel.Config, filePath.BestModel.Weight)


        if hasNext == False:
                
            print("Make next generation model from copy of best model")

            shutil.copyfile(filePath.BestModel.Config, filePath.NextGeneration.Config)
            shutil.copyfile(filePath.BestModel.Weight, filePath.NextGeneration.Weight)
    def MakeHopperTask(self, modelNum, trainNum, taskTrainDir, evalNum,
                       taskEvalDir):

        env = MujocoEnv(self)

        state = env.GetSimState()

        jsons = []

        for j in range(modelNum):

            if j != 0:
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "rootx")] = random.uniform(-0.8, 0.8)
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "rootz")] = random.uniform(2, 2)
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "thigh_joint")] = pt = random.uniform(-0.1, -0.5)
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "leg_joint")] = pl = random.uniform(-0.1, -0.5)
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "rooty")] = random.uniform(-0.2, 0.2) + (pt + pl) / 2
                state.qpos[env.Model.MujocoModel.get_joint_qpos_addr(
                    "foot_joint")] = random.uniform(-0.5, 0.5)

            env.SetSimState(state)

            ok = False

            for i in range(400):
                env.Step(env.GetActionNum() - 1)

                sensor = env.GetSensorValue(3, "a_foot_joint")
                if sensor[2] >= 20 and i >= 10:
                    ok = True
                    break

            if ok == False:
                continue

            joints = [
                'thigh_joint', 'leg_joint', 'foot_joint', 'rooty', 'rootx',
                'rootz'
            ]

            task = {}
            for i in joints:
                task[i] = env.GetSensorValue(1, "jp_" + i)[0]

            jsons.append(task)

        for i in range(trainNum):

            filePath = taskTrainDir + "/TrainTask" + str(i) + ".task"

            print(filePath)

            task1 = random.choice(jsons)
            task2 = jsons[0]

            with open(filePath, "wt") as f:
                json.dump(list([task1, task2]), f)

        for i in range(evalNum):

            filePath = taskEvalDir + "/EvalTask" + str(i) + ".task"

            print(filePath)

            task1 = random.choice(jsons)
            task2 = jsons[0]

            with open(filePath, "wt") as f:
                json.dump(list([task1, task2]), f)