def Start(self): filePath = self.Config.FilePath.BestModel cnf = "BestLog/BestLog180126082522.cnf" wgt = "BestLog/BestLog180126082522.wgt" timeLimit = 0.9 net = Network() net.Load(cnf, wgt) net.TimeLimit = timeLimit model = Model() taskName = "TaskEval/EvalTask114.task" task = MujocoTask(model, taskName) #task = MujocoTask.LoadRandom(model, self.Config.Task.EvalDir) env = MujocoEnv(model) agentConfig = self.Config.ViewerAgent agent = Agent(agentConfig, net, model, task) bestAction = agent.SearchBestAction() while True: env.SetSimState(task.StartState) for action in bestAction: env.Step(action) #print(env.GetObservation(task)) env.Render()
def MakeGenerationModel(self): filePath = self.Config.FilePath hasBest = os.path.exists(filePath.BestModel.Config) hasBest |= os.path.exists(filePath.BestModel.Weight) hasNext = os.path.exists(filePath.NextGeneration.Config) hasNext |= os.path.exists(filePath.NextGeneration.Weight) if hasBest == False: model = MujocoModelHumanoid() env = MujocoEnv(model) dataDir = self.Config.Task.TrainDir dataList = os.listdir(dataDir) task = MujocoTask(model, dataDir+"/"+dataList[0]) net = NetworkModel() net.Build(self.Config.Build, env.GetObservationShape(task), env.GetActionNum(), self.Config.Worker.InitialTimeLimit) print("Make best model") net.Save(filePath.BestModel.Config, filePath.BestModel.Weight) if hasNext == False: print("Make next generation model from copy of best model") shutil.copyfile(filePath.BestModel.Config, filePath.NextGeneration.Config) shutil.copyfile(filePath.BestModel.Weight, filePath.NextGeneration.Weight)
def __init__(self, config, network, model, task): assert isinstance(network, NetworkModel) assert isinstance(model, MujocoModel) self.Config = config self.Network = network self.Env = MujocoEnv(model) self.Task = task self.StepTarget = [] self.TrainData = list([]) self.ValueCalclater = ValueCaluculator(config.ValueCalc)
def CalcScore(self, net, filePath): bestModel = MujocoModelHumanoid() bestTask = MujocoTask(bestModel, filePath) bestEnv = MujocoEnv(bestModel) bestAgent = Agent(self.Config.CheckerAgent, net, bestModel, bestTask) bestAction = bestAgent.SearchBestAction() bestScore = self.GetScore(bestEnv, bestTask, bestAction) return bestScore
def CalcScores(self, best, next, filePath): bestModel = MujocoModelHumanoid() bestTask = MujocoTask(bestModel, filePath) bestEnv = MujocoEnv(bestModel) nextModel = MujocoModelHumanoid() nextTask = MujocoTask(nextModel, filePath) nextEnv = MujocoEnv(nextModel) bestAgent = Agent(self.Config.EvaluateAgent, best, bestModel, bestTask) nextAgent = Agent(self.Config.EvaluateAgent, next, nextModel, nextTask) bestAction = bestAgent.SearchBestAction() nextAction = nextAgent.SearchBestAction() bestScore = self.GetScore(bestEnv, bestTask, bestAction) nextScore = self.GetScore(nextEnv, nextTask, nextAction) #nextAgent.SaveTrainData(self.Config.GetTrainPath("next")) return bestScore, nextScore
def Start(self): filePath = self.Config.FilePath.NextGeneration net = Network() net.Load(filePath.Config, filePath.Weight) model = Model() task = MujocoTask(model, self.GetRandomFile()) env = MujocoEnv(model) agentConfig = self.Config.SelfPlayAgent agent = Agent(agentConfig, net, model, task) bestAction = agent.SearchBestAction() print(bestAction) agent.SaveTrainData(self.Config.GetTrainPath())
def MakeHopperTask(self, modelNum, trainNum, taskTrainDir, evalNum, taskEvalDir): env = MujocoEnv(self) state = env.GetSimState() jsons = [] for j in range(modelNum): if j != 0: state.qpos[env.Model.MujocoModel.get_joint_qpos_addr( "rootx")] = random.uniform(-0.8, 0.8) state.qpos[env.Model.MujocoModel.get_joint_qpos_addr( "rootz")] = random.uniform(2, 2) state.qpos[env.Model.MujocoModel.get_joint_qpos_addr( "thigh_joint")] = pt = random.uniform(-0.1, -0.5) state.qpos[env.Model.MujocoModel.get_joint_qpos_addr( "leg_joint")] = pl = random.uniform(-0.1, -0.5) state.qpos[env.Model.MujocoModel.get_joint_qpos_addr( "rooty")] = random.uniform(-0.2, 0.2) + (pt + pl) / 2 state.qpos[env.Model.MujocoModel.get_joint_qpos_addr( "foot_joint")] = random.uniform(-0.5, 0.5) env.SetSimState(state) ok = False for i in range(400): env.Step(env.GetActionNum() - 1) sensor = env.GetSensorValue(3, "a_foot_joint") if sensor[2] >= 20 and i >= 10: ok = True break if ok == False: continue joints = [ 'thigh_joint', 'leg_joint', 'foot_joint', 'rooty', 'rootx', 'rootz' ] task = {} for i in joints: task[i] = env.GetSensorValue(1, "jp_" + i)[0] jsons.append(task) for i in range(trainNum): filePath = taskTrainDir + "/TrainTask" + str(i) + ".task" print(filePath) task1 = random.choice(jsons) task2 = jsons[0] with open(filePath, "wt") as f: json.dump(list([task1, task2]), f) for i in range(evalNum): filePath = taskEvalDir + "/EvalTask" + str(i) + ".task" print(filePath) task1 = random.choice(jsons) task2 = jsons[0] with open(filePath, "wt") as f: json.dump(list([task1, task2]), f)