def main(): stateDim = env.observation_space.shape[0] actionDim = env.action_space.n buildModel = BuildModel(stateDim, actionDim) layersWidths = [30] writer, model = buildModel(layersWidths) dirName = os.path.dirname(__file__) modelPath = os.path.join( dirName, '..', 'trainedDQNModels', 'epsilonIncrease=0.0002_epsilonMin=0_gamma=0.9_learningRate=0.001_maxEpisode=1000_maxTimeStep=2000_minibatchSize=128.ckpt' ) restoreVariables(model, modelPath) policy = lambda state: [ActGreedyByModel(getTrainQValue, model)(state)] isTerminal = IsTerminalMountCarDiscrete() transit = TransitMountCarDiscrete() getReward = rewardMountCarDiscrete reset = ResetMountCarDiscrete(seed=None, low=-1, high=0.4) for i in range(20): maxRunningSteps = 2000 sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, getReward, reset) trajectory = sampleTrajectory(policy) # plots& plot showDemo = True if showDemo: visualize = VisualizeMountCarDiscrete() visualize(trajectory)
def main(): stateDim = env.observation_space.shape[0] actionDim = env.action_space.shape[0] actionHigh = env.action_space.high actionLow = env.action_space.low actionBound = (actionHigh - actionLow) / 2 buildActorModel = BuildActorModel(stateDim, actionDim, actionBound) actorLayerWidths = [30] actorWriter, actorModel = buildActorModel(actorLayerWidths) dirName = os.path.dirname(__file__) actorModelPath = os.path.join( dirName, '..', 'trainedDDPGModels', 'Eps=300_High=0.4_actorModel=0_batch=128_env=MountainCarContinuous-v0_gam=0.9_lrActor=0.001_lrCritic=0.001_noiseVar=1_resetLow=-1_timeStep=2000_varDiscout=0.99995.ckpt' ) restoreVariables(actorModel, actorModelPath) policy = ActDDPGOneStep(actionLow, actionHigh, actByPolicyTrain, actorModel, getNoise=None) isTerminal = IsTerminalMountCarContin() reset = ResetMountCarContin(seed=None, low=-1, high=0.4) transit = TransitGymMountCarContinuous() rewardFunc = RewardMountCarContin(isTerminal) for i in range(20): maxRunningSteps = 2000 sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, rewardFunc, reset) trajectory = sampleTrajectory(policy) # plots& plot showDemo = True if showDemo: visualize = VisualizeMountCarContin() visualize(trajectory)
def main(): stateDim = env.observation_space.shape[0] actionDim = env.action_space.shape[0] actionHigh = env.action_space.high actionLow = env.action_space.low actionBound = (actionHigh - actionLow) / 2 buildActorModel = BuildActorModel(stateDim, actionDim, actionBound) actorLayerWidths = [30] actorWriter, actorModel = buildActorModel(actorLayerWidths) actorModelPath = os.path.join( dirName, '..', 'trainedDDPGModels', 'Eps=200_actorModel=0_batch=128_env=Pendulum-v0_gam=0.9_lrActor=0.001_lrCritic=0.001_noiseVar=3_timeStep=200_varDiscout=0.9995.ckpt' ) restoreVariables(actorModel, actorModelPath) actOneStep = ActDDPGOneStep(actionLow, actionHigh, actByPolicyTrain, actorModel, getNoise=None) policy = lambda state: actOneStep(observe(state)) isTerminal = isTerminalGymPendulum reset = ResetGymPendulum(seed) transit = TransitGymPendulum() rewardFunc = RewardGymPendulum(angle_normalize) for i in range(10): maxRunningSteps = 200 sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, rewardFunc, reset) trajectory = sampleTrajectory(policy) # plots& plot showDemo = True if showDemo: visualize = VisualizeGymPendulum() visualize(trajectory)
def generateSingleCondition(condition): debug = 0 if debug: damping=2.0 frictionloss=0.0 masterForce=1.0 numWolves = 1 numSheeps = 1 numMasters = 1 maxTimeStep = 25 saveTraj=False saveImage=True visualizeMujoco=False visualizeTraj = True makeVideo=True else: # print(sys.argv) # condition = json.loads(sys.argv[1]) damping = float(condition['damping']) frictionloss = float(condition['frictionloss']) masterForce = float(condition['masterForce']) numWolves = 1 numSheeps = 1 numMasters = 1 maxTimeStep = 25 saveTraj=False saveImage=True visualizeMujoco=False visualizeTraj = True makeVideo=False print("maddpg: , saveTraj: {}, visualize: {},damping; {},frictionloss: {}".format( str(saveTraj), str(visualizeMujoco),damping,frictionloss)) numAgents = numWolves + numSheeps+numMasters numEntities = numAgents + numMasters wolvesID = [0] sheepsID = [1] masterID = [2] wolfSize = 0.075 sheepSize = 0.05 masterSize = 0.075 entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [masterSize] * numMasters wolfMaxSpeed = 1.0 blockMaxSpeed = None entitiesMovableList = [True] * numAgents + [False] * numMasters massList = [1.0] * numEntities isCollision = IsCollision(getPosFromAgentState) punishForOutOfBound = PunishForOutOfBound() rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision,punishForOutOfBound) rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision) rewardMaster= lambda state, action, nextState: [-reward for reward in rewardWolf(state, action, nextState)] rewardFunc = lambda state, action, nextState: \ list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))+list(rewardMaster(state, action, nextState)) dirName = os.path.dirname(__file__) # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leased.xml') makePropertyList=MakePropertyList(transferNumberListToStr) geomIds=[1,2,3] keyNameList=[0,1] valueList=[[damping,damping]]*len(geomIds) dampngParameter=makePropertyList(geomIds,keyNameList,valueList) changeJointDampingProperty=lambda envDict,geomPropertyDict:changeJointProperty(envDict,geomPropertyDict,'@damping') geomIds=[1,2,3] keyNameList=[0,1] valueList=[[frictionloss,frictionloss]]*len(geomIds) frictionlossParameter=makePropertyList(geomIds,keyNameList,valueList) changeJointFrictionlossProperty=lambda envDict,geomPropertyDict:changeJointProperty(envDict,geomPropertyDict,'@frictionloss') physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leasedNew.xml') # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leased.xml') with open(physicsDynamicsPath) as f: xml_string = f.read() envXmlDict = xmltodict.parse(xml_string.strip()) envXmlDict = xmltodict.parse(xml_string.strip()) envXmlPropertyDictList=[dampngParameter,frictionlossParameter] changeEnvXmlPropertFuntionyList=[changeJointDampingProperty,changeJointFrictionlossProperty] for propertyDict,changeXmlProperty in zip(envXmlPropertyDictList,changeEnvXmlPropertFuntionyList): envXmlDict=changeXmlProperty(envXmlDict,propertyDict) envXml=xmltodict.unparse(envXmlDict) physicsModel = mujoco.load_model_from_xml(envXml) physicsSimulation = mujoco.MjSim(physicsModel) # print(physicsSimulation.model.body_pos) # print(dir(physicsSimulation.model)) # print(dir(physicsSimulation.data),physicsSimulation.dataphysicsSimulation.data) # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos)) # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos)) qPosInit = (0, ) * 24 qVelInit = (0, ) * 24 qPosInitNoise = 0.4 qVelInitNoise = 0 numAgent = 2 tiedAgentId = [0, 2] ropePartIndex = list(range(3, 12)) maxRopePartLength = 0.06 reset = ResetUniformWithoutXPosForLeashed(physicsSimulation, qPosInit, qVelInit, numAgent, tiedAgentId,ropePartIndex, maxRopePartLength, qPosInitNoise, qVelInitNoise) numSimulationFrames=10 isTerminal= lambda state: False reshapeActionList = [ReshapeAction(5),ReshapeAction(5),ReshapeAction(masterForce)] transit=TransitionFunctionWithoutXPos(physicsSimulation, numSimulationFrames, visualizeMujoco,isTerminal, reshapeActionList) # damping=2.5 # numSimulationFrames =int(0.1/dt) # agentsMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps # reshapeAction = ReshapeAction() # isTerminal = lambda state: False # transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt*numSimulationFrames,agentsMaxSpeedList,visualizeMujoco,isTerminal,reshapeAction) maxRunningStepsToSample = 100 sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit, isTerminal, rewardFunc, reset) observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, masterID, getPosFromAgentState, getVelFromAgentState) observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)] initObsForParams = observe(reset()) obsShape = [initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams))] worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [128, 128] # ------------ model ------------------------ buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape) modelsList = [buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)] # individStr = 'individ' if individualRewardWolf else 'shared' # fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format( # numWolves, numSheeps, numMasters, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr) # wolvesModelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg', fileName + str(i) + '60000eps') for i in wolvesID] # [restoreVariables(model, path) for model, path in zip(wolvesModel, wolvesModelPaths)] # # actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy) # policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList] # modelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg_ExpEpsLengthAndSheepSpeed', fileName + str(i)) for i in # range(numAgents)] # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG') # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','timeconst='+str(timeconst)+'dampratio='+str(dampratio)) modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPGLeasedFixedEnv2','damping={}_frictionloss={}_masterForce={}'.format(damping,frictionloss,masterForce)) fileName = "maddpg{}episodes{}step_agent".format(maxEpisode, maxTimeStep) modelPaths = [os.path.join(modelFolder, fileName + str(i) + '60000eps') for i in range(numAgents)] [restoreVariables(model, path) for model, path in zip(modelsList, modelPaths)] actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy) policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList] trajList = [] numTrajToSample = 5 for i in range(numTrajToSample): np.random.seed(i) traj = sampleTrajectory(policy) trajList.append(list(traj)) # saveTraj if saveTraj: trajFileName = "maddpg{}wolves{}sheep{}blocks{}eps{}stepSheepSpeed{}{}Traj".format(numWolves, numSheeps, numMasters, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr) trajSavePath = os.path.join(dirName, '..', 'trajectory', trajFileName) saveToPickle(trajList, trajSavePath) # visualize if visualizeTraj: demoFolder = os.path.join(dirName, '..', 'demos', 'mujocoMADDPGLeasedFixedEnv2','damping={}_frictionloss={}_masterForce={}'.format(damping,frictionloss,masterForce)) if not os.path.exists(demoFolder): os.makedirs(demoFolder) entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheeps + [masterColor] * numMasters render = Render(entitiesSizeList, entitiesColorList, numAgents,demoFolder,saveImage, getPosFromAgentState) # print(trajList[0][0],'!!!3',trajList[0][1]) trajToRender = np.concatenate(trajList) render(trajToRender)
def simuliateOneParameter(parameterDict): wolfSize = 0.075 sheepSize = 0.05 blockSize = 0.2 wolfColor = np.array([0.85, 0.35, 0.35]) sheepColor = np.array([0.35, 0.85, 0.35]) blockColor = np.array([0.25, 0.25, 0.25]) wolvesID = [0,1] sheepsID = [2] blocksID = [3] numWolves = len(wolvesID) numSheeps = len(sheepsID) numBlocks = len(blocksID) sheepMaxSpeed = 1.3 wolfMaxSpeed =1.0 blockMaxSpeed = None agentsMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps numAgents = numWolves + numSheeps numEntities = numAgents + numBlocks entitiesSizeList = [wolfSize]* numWolves + [sheepSize] * numSheeps + [blockSize]* numBlocks entityMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps + [blockMaxSpeed]* numBlocks entitiesMovableList = [True]* numAgents + [False] * numBlocks massList = [1.0] * numEntities isCollision = IsCollision(getPosFromAgentState) rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision) punishForOutOfBound = PunishForOutOfBound() rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound) rewardFunc = lambda state, action, nextState: \ list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState)) makePropertyList=MakePropertyList(transferNumberListToStr) #changeCollisionReleventParameter dmin=parameterDict['dmin'] dmax=parameterDict['dmax'] width=parameterDict['width'] midpoint=parameterDict['midpoint'] power=parameterDict['power'] timeconst=parameterDict['timeconst'] dampratio=parameterDict['dampratio'] geomIds=[1,2] keyNameList=['@solimp','@solref'] valueList=[[[dmin,dmax,width,midpoint,power],[timeconst,dampratio]]]*len(geomIds) collisionParameter=makePropertyList(geomIds,keyNameList,valueList) #changeSize # geomIds=[1,2] # keyNameList=['@size'] # valueList=[[[0.075,0.075]],[[0.1,0.1]]] # sizeParameter=makePropertyList(geomIds,keyNameList,valueList) #load env xml and change some geoms' property # physicsDynamicsPath=os.path.join(dirName,'multiAgentcollision.xml') physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','multiAgentcollision.xml') with open(physicsDynamicsPath) as f: xml_string = f.read() envXmlDict = xmltodict.parse(xml_string.strip()) print(envXmlDict) envXmlPropertyDictList=[collisionParameter] changeEnvXmlPropertFuntionyList=[changeGeomProperty] for propertyDict,changeXmlProperty in zip(envXmlPropertyDictList,changeEnvXmlPropertFuntionyList): envXmlDict=changeXmlProperty(envXmlDict,propertyDict) envXml=xmltodict.unparse(envXmlDict) physicsModel = mujoco.load_model_from_xml(envXml) physicsSimulation = mujoco.MjSim(physicsModel) # MDP function qPosInit = [0, 0]*numAgents qVelInit = [0, 0]*numAgents qVelInitNoise = 0 qPosInitNoise = 1 # reset=ResetUniformWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents, numBlocks,qPosInitNoise, qVelInitNoise) fixReset=ResetFixWithoutXPos(physicsSimulation, qPosInit, qVelInit, numAgents,numBlocks) blocksState=[[0,0,0,0]] reset=lambda :fixReset([-0.5,0.8,-0.5,0,0.5,0.8],[0,0,0,0,0,0],blocksState) isTerminal = lambda state: False numSimulationFrames = 1 visualize=False # physicsViewer=None dt=0.1 damping=2.5 transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt/numSimulationFrames,agentsMaxSpeedList,visualize,isTerminal) maxRunningSteps = 100 sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, rewardFunc, reset) observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState) observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)] initObsForParams = observe(reset()) obsShape = [initObsForParams[obsID].shape for obsID in range(len(initObsForParams))] worldDim = 2 evalNum=1 trajectorySaveExtension = '.pickle' fixedParameters = {'isMujoco': 1,'isCylinder':1,'evalNum':evalNum} trajectoriesSaveDirectory=trajSavePath = os.path.join(dirName,'..','trajectory') generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters) trajectorySavePath = generateTrajectorySavePath(parameterDict) if not os.path.isfile(trajectorySavePath): trajList =list() for i in range(evalNum): # policy =lambda state: [[-3,0] for agent in range(numAgents)] np.random.seed(i) # policy =lambda state: [np.random.uniform(-5,5,2) for agent in range(numAgents)]sss # policy =lambda state: [[0,1] for agent in range(numAgents)] policy =lambda state: [[1,0] ] # policy =lambda state: [[np.random.uniform(0,1,1),0] ,[np.random.uniform(-1,0,1),0] ] traj = sampleTrajectory(policy) # print(i,'traj',[state[1] for state in traj[:2]]) # print(traj) trajList.append( traj) # saveTraj saveTraj = True if saveTraj: # trajSavePath = os.path.join(dirName,'traj', 'evaluateCollision', 'CollisionMoveTransitDamplingCylinder.pickle') saveToPickle(trajList, trajectorySavePath) # visualize # physicsViewer.render() visualize = True if visualize: entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheeps + [blockColor] * numBlocks render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState) render(trajList)
def main(): numAgents = 2 stateDim = numAgents * 2 actionLow = -1 actionHigh = 1 actionBound = (actionHigh - actionLow) / 2 actionDim = 2 buildActorModel = BuildActorModel(stateDim, actionDim, actionBound) actorLayerWidths = [64] actorWriter, actorModel = buildActorModel(actorLayerWidths) dirName = os.path.dirname(__file__) actorModelPath = os.path.join( dirName, '..', 'trainedDDPGModels', 'actorModel=0_dimension=2_gamma=0.95_learningRateActor=0.001_learningRateCritic=0.001_maxEpisode=2000_maxTimeStep=20_minibatchSize=32_wolfSpeed=0.5.ckpt' ) # 'actorModel=0_dimension=2_gamma=0.95_learningRateActor=0.01_learningRateCritic=0.01_maxEpisode=400_maxTimeStep=100_minibatchSize=32_wolfSpeed=1.ckpt') # 'actorModel=0_dimension=2_gamma=0.95_learningRateActor=0.01_learningRateCritic=0.01_maxEpisode=1000_maxTimeStep=100_minibatchSize=32_wolfSpeed=0.5.ckpt') # 'actorModel=0_dimension=2_gamma=0.95_learningRateActor=0.01_learningRateCritic=0.01_maxEpisode=5000_maxTimeStep=100_minibatchSize=32_wolfSpeed=0.5.ckpt') restoreVariables(actorModel, actorModelPath) sheepPolicy = ActDDPGOneStep(actionLow, actionHigh, actByPolicyTrain, actorModel, getNoise=None) sheepId = 0 wolfId = 1 getSheepPos = GetAgentPosFromState(sheepId) getWolfPos = GetAgentPosFromState(wolfId) wolfSpeed = 0.5 wolfPolicy = HeatSeekingContinuousDeterministicPolicy( getWolfPos, getSheepPos, wolfSpeed) xBoundary = (0, 20) yBoundary = (0, 20) stayWithinBoundary = StayWithinBoundary(xBoundary, yBoundary) transit = TransitForNoPhysics(getIntendedNextState, stayWithinBoundary) # transit = TransitWithSingleWolf(physicalTransition, wolfPolicy) maxTimeStep = 20 sheepAliveBonus = 1 / maxTimeStep sheepTerminalPenalty = 20 killzoneRadius = 0 isTerminal = IsTerminal(getWolfPos, getSheepPos, killzoneRadius) getBoundaryPunishment = GetBoundaryPunishment(xBoundary, yBoundary, sheepIndex=0, punishmentVal=10) rewardSheep = RewardFunctionCompete(sheepAliveBonus, sheepTerminalPenalty, isTerminal) rewardSheepWithBoundaryHeuristics = RewardSheepWithBoundaryHeuristics( rewardSheep, getIntendedNextState, getBoundaryPunishment, getSheepPos) getSheepAction = lambda actions: [ actions[sheepId * actionDim], actions[sheepId * actionDim + 1] ] getReward = lambda state, action, nextState: rewardSheepWithBoundaryHeuristics( state, getSheepAction(action), nextState) policy = lambda state: list(sheepPolicy(state)) + list(wolfPolicy(state)) # reset = Reset(xBoundary, yBoundary, numAgents) reset = lambda: np.array([10, 10, 15, 5]) for i in range(10): maxRunningSteps = 50 sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, getReward, reset) trajectory = sampleTrajectory(policy) # plots& plot showDemo = True if showDemo: observe = Observe(trajectory, numAgents) fullScreen = False screenWidth = 800 screenHeight = 800 screen = initializeScreen(fullScreen, screenWidth, screenHeight) leaveEdgeSpace = 200 lineWidth = 3 xBoundary = [leaveEdgeSpace, screenWidth - leaveEdgeSpace * 2] yBoundary = [leaveEdgeSpace, screenHeight - leaveEdgeSpace * 2] screenColor = THECOLORS['black'] lineColor = THECOLORS['white'] drawBackground = DrawBackground(screen, screenColor, xBoundary, yBoundary, lineColor, lineWidth) circleSize = 10 positionIndex = [0, 1] drawState = DrawState(screen, circleSize, positionIndex, drawBackground) numberOfAgents = 2 chasingColors = [THECOLORS['green'], THECOLORS['red']] colorSpace = chasingColors[:numberOfAgents] FPS = 60 chaseTrial = ChaseTrialWithTraj(FPS, colorSpace, drawState, saveImage=True) rawXRange = [0, 20] rawYRange = [0, 20] scaledXRange = [210, 590] scaledYRange = [210, 590] scaleTrajectory = ScaleTrajectory(positionIndex, rawXRange, rawYRange, scaledXRange, scaledYRange) oldFPS = 5 adjustFPS = AdjustDfFPStoTraj(oldFPS, FPS) getTrajectory = lambda rawTrajectory: scaleTrajectory( adjustFPS(rawTrajectory)) positionList = [observe(index) for index in range(len(trajectory))] positionListToDraw = getTrajectory(positionList) currentDir = os.getcwd() parentDir = os.path.abspath(os.path.join(currentDir, os.pardir)) imageFolderName = 'Demo' saveImageDir = os.path.join(os.path.join(parentDir, 'chasingDemo'), imageFolderName) if not os.path.exists(saveImageDir): os.makedirs(saveImageDir) chaseTrial(numberOfAgents, positionListToDraw, saveImageDir)
def main(): debug = 1 if debug: numWolves = 1 numSheeps = 1 numBlocks = 1 saveTraj = False visualizeTraj = True maxTimeStep = 25 sheepSpeedMultiplier = 1 individualRewardWolf = 0 timeconst=0.5 dampratio=0.2 else: print(sys.argv) condition = json.loads(sys.argv[1]) numWolves = int(condition['numWolves']) numSheeps = int(condition['numSheeps']) numBlocks = int(condition['numBlocks']) saveTraj = True visualizeTraj = False maxTimeStep = 50 sheepSpeedMultiplier = 1.25 individualRewardWolf = 1 print("maddpg: {} wolves, {} sheep, {} blocks, saveTraj: {}, visualize: {}".format(numWolves, numSheeps, numBlocks, str(saveTraj), str(visualizeTraj))) numAgents = numWolves + numSheeps numEntities = numAgents + numBlocks wolvesID = list(range(numWolves)) sheepsID = list(range(numWolves, numAgents)) blocksID = list(range(numAgents, numEntities)) wolfSize = 0.075 sheepSize = 0.05 blockSize = 0.2 entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [blockSize] * numBlocks wolfMaxSpeed = 1.0 blockMaxSpeed = None sheepMaxSpeedOriginal = 1.3 sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed] * numSheeps + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities isCollision = IsCollision(getPosFromAgentState) punishForOutOfBound = PunishForOutOfBound() rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound) if individualRewardWolf: rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList, isCollision) else: rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision) rewardFunc = lambda state, action, nextState: list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState)) reset = ResetMultiAgentChasing(numAgents, numBlocks) observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState) observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)] reshapeAction = ReshapeAction() getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) transit = TransitMultiAgentChasing(numEntities, reshapeAction, applyActionForce, applyEnvironForce, integrateState) isTerminal = lambda state: False maxRunningStepsToSample = 100 sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit, isTerminal, rewardFunc, reset) initObsForParams = observe(reset()) obsShape = [initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams))] worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [128, 128] # ------------ model ------------------------ buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape) modelsList = [buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)] dirName = os.path.dirname(__file__) # individStr = 'individ' if individualRewardWolf else 'shared' # fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format( # numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr) fileName = "maddpg{}wolves{}sheep{}blocks60000episodes25stepSheepSpeed1shared_agent".format(numWolves, numSheeps, numBlocks) # wolvesModelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg', fileName + str(i) + '60000eps') for i in wolvesID] # [restoreVariables(model, path) for model, path in zip(wolvesModel, wolvesModelPaths)] # # actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy) # policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList] # modelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg_ExpEpsLengthAndSheepSpeed', fileName + str(i)) for i in # range(numAgents)] modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','timeconst='+str(timeconst)+'dampratio='+str(dampratio)) modelPaths = [os.path.join(modelFolder, fileName + str(i) + '60000eps') for i in range(numAgents)] [restoreVariables(model, path) for model, path in zip(modelsList, modelPaths)] actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy) policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList] trajList = [] numTrajToSample = 50 for i in range(numTrajToSample): traj = sampleTrajectory(policy) trajList.append(list(traj)) # saveTraj if saveTraj: trajFileName = "maddpg{}wolves{}sheep{}blocks{}eps{}stepSheepSpeed{}{}Traj".format(numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr) trajSavePath = os.path.join(dirName, '..', 'trajectory', trajFileName) saveToPickle(trajList, trajSavePath) # visualize if visualizeTraj: entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheeps + [blockColor] * numBlocks render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState) trajToRender = np.concatenate(trajList) render(trajToRender)
def main(): # wolvesID = [0, 1] # sheepsID = [2] # blocksID = [3] visualize = False wolfSize = 0.075 sheepSize = 0.05 blockSize = 0.2 sheepMaxSpeed = 1.3 wolfMaxSpeed = 1.0 blockMaxSpeed = None wolfColor = np.array([0.85, 0.35, 0.35]) sheepColor = np.array([0.35, 0.85, 0.35]) blockColor = np.array([0.25, 0.25, 0.25]) wolvesID = [0] sheepsID = [1] blocksID = [2] numWolves = len(wolvesID) numSheeps = len(sheepsID) numBlocks = len(blocksID) numAgents = numWolves + numSheeps numEntities = numAgents + numBlocks entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [ blockSize ] * numBlocks entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [ sheepMaxSpeed ] * numSheeps + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities isCollision = IsCollision(getPosFromAgentState) rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision) punishForOutOfBound = PunishForOutOfBound() rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound) rewardFunc = lambda state, action, nextState: \ list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState)) reset = ResetMultiAgentChasing(numAgents, numBlocks) # reset=lambda :np.array([[-0.5,0,0,0],[0.5,0,0,0]]) reset = lambda: np.array([[-0.5, 0, 0, 0], [8, 8, 0, 0], [0, -8, 0, 0]]) observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState) observe = lambda state: [ observeOneAgent(agentID)(state) for agentID in range(numAgents) ] # reshapeAction = ReshapeAction() reshapeAction = lambda action: action getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = FixedIntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) transit = TransitMultiAgentChasing(numEntities, reshapeAction, applyActionForce, applyEnvironForce, integrateState) isTerminal = lambda state: False maxRunningSteps = 100 sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, rewardFunc, reset) initObsForParams = observe(reset()) obsShape = [ initObsForParams[obsID].shape for obsID in range(len(initObsForParams)) ] worldDim = 2 trajList = [] evalNum = 1 for i in range(evalNum): # policy =lambda state: [[-3,0] for agent in range(numAgents)] np.random.seed(i) # policy =lambda state: [np.random.uniform(-5,5,2) for agent in range(numAgents)] # policy =lambda state: [[1,1] for agent in range(numAgents)] # policy =lambda state: [[float(np.random.uniform(0,1,1)),0] ,[float(np.random.uniform(-1,0,1)),0] ] policy = lambda state: [[1, 0], [0, 0]] traj = sampleTrajectory(policy) # print(i,'traj',[state[1] for state in traj) # print('traj',[tr[0][0][0] for tr in traj]) print('traj', traj[0]) trajList.append(traj) # saveTraj saveTraj = True # print(trajList[0][0][0]) if saveTraj: trajSavePath = os.path.join(dirName, '..', 'trajectory', 'LineMoveFixedBaseLine.pickle') saveToPickle(trajList, trajSavePath) # print('traj',trajList[25]) # visualize if visualize: entitiesColorList = [wolfColor] * numWolves + [ sheepColor ] * numSheeps + [blockColor] * numBlocks render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState) trajToRender = np.concatenate(trajList) render(trajToRender)
def main(): numTrajToSample = 500 maxRunningStepsToSample = 50 wolfSize = 0.075 sheepSize = 0.05 blockSize = 0.2 sheepMaxSpeed = 1.3 wolfMaxSpeed = 1.0 blockMaxSpeed = None numWolves = 3 numBlocks = 2 maxEpisode = 60000 numSheepsList = [1, 2, 4, 8] meanRewardList = [] seList = [] for numSheeps in numSheepsList: numAgents = numWolves + numSheeps numEntities = numAgents + numBlocks wolvesID = list(range(numWolves)) sheepsID = list(range(numWolves, numAgents)) blocksID = list(range(numAgents, numEntities)) entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [ blockSize ] * numBlocks entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [ sheepMaxSpeed ] * numSheeps + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities isCollision = IsCollision(getPosFromAgentState) rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision) punishForOutOfBound = PunishForOutOfBound() rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound) rewardFunc = lambda state, action, nextState: \ list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState)) reset = ResetMultiAgentChasing(numAgents, numBlocks) observeOneAgent = lambda agentID: Observe( agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState) observe = lambda state: [ observeOneAgent(agentID)(state) for agentID in range(numAgents) ] reshapeAction = ReshapeAction() getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) transit = TransitMultiAgentChasing(numEntities, reshapeAction, applyActionForce, applyEnvironForce, integrateState) isTerminal = lambda state: False sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit, isTerminal, rewardFunc, reset) initObsForParams = observe(reset()) obsShape = [ initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams)) ] worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [128, 128] buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape) modelsList = [ buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents) ] dirName = os.path.dirname(__file__) fileName = "maddpg{}wolves{}sheep{}blocks{}eps_agent".format( numWolves, numSheeps, numBlocks, maxEpisode) modelPaths = [ os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg', fileName + str(i) + '60000eps') for i in range(numAgents) ] [ restoreVariables(model, path) for model, path in zip(modelsList, modelPaths) ] actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy) policy = lambda allAgentsStates: [ actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList ] trajRewardList = [] for i in range(numTrajToSample): traj = sampleTrajectory(policy) trajReward = calcTrajRewardWithSharedWolfReward(traj) trajRewardList.append(trajReward) meanTrajReward = np.mean(trajRewardList) seTrajReward = np.std(trajRewardList) / np.sqrt( len(trajRewardList) - 1) print('meanTrajReward: ', meanTrajReward) meanRewardList.append(meanTrajReward) seList.append(seTrajReward) # ---- individually rewarded wolves ------ meanRewardListIndividWolf = [] seListIndividWolf = [] for numSheeps in numSheepsList: numAgents = numWolves + numSheeps numEntities = numAgents + numBlocks wolvesID = list(range(numWolves)) sheepsID = list(range(numWolves, numAgents)) blocksID = list(range(numAgents, numEntities)) entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [ blockSize ] * numBlocks entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [ sheepMaxSpeed ] * numSheeps + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities isCollision = IsCollision(getPosFromAgentState) rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList, isCollision) punishForOutOfBound = PunishForOutOfBound() rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound) rewardFunc = lambda state, action, nextState: \ list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState)) reset = ResetMultiAgentChasing(numAgents, numBlocks) observeOneAgent = lambda agentID: Observe( agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState) observe = lambda state: [ observeOneAgent(agentID)(state) for agentID in range(numAgents) ] reshapeAction = ReshapeAction() getCollisionForce = GetCollisionForce() applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList) applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce, getPosFromAgentState) integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState) transit = TransitMultiAgentChasing(numEntities, reshapeAction, applyActionForce, applyEnvironForce, integrateState) isTerminal = lambda state: False sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit, isTerminal, rewardFunc, reset) initObsForParams = observe(reset()) obsShape = [ initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams)) ] worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [128, 128] buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape) modelsList = [ buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents) ] dirName = os.path.dirname(__file__) fileName = "maddpgIndividWolf{}wolves{}sheep{}blocks{}eps_agent".format( numWolves, numSheeps, numBlocks, maxEpisode) modelPaths = [ os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg', fileName + str(i) + '60000eps') for i in range(numAgents) ] [ restoreVariables(model, path) for model, path in zip(modelsList, modelPaths) ] actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy) policy = lambda allAgentsStates: [ actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList ] trajRewardList = [] for i in range(numTrajToSample): traj = sampleTrajectory(policy) trajReward = calcTrajRewardWithIndividualWolfReward(traj, wolvesID) trajRewardList.append(trajReward) meanTrajReward = np.mean(trajRewardList) seTrajReward = np.std(trajRewardList) / np.sqrt( len(trajRewardList) - 1) print('meanTrajReward: ', meanTrajReward) meanRewardListIndividWolf.append(meanTrajReward) seListIndividWolf.append(seTrajReward) print('meanRewardList: ', meanRewardList) print('std error: ', seList) print('meanRewardListIndividWolf: ', meanRewardListIndividWolf) print('std error: ', seListIndividWolf) plotResult = True if plotResult: fig = plt.figure() plt.errorbar(numSheepsList, meanRewardList, seList) plt.errorbar(numSheepsList, meanRewardListIndividWolf, seListIndividWolf) fig.suptitle('MADDPG Performance With 3 Wolves') plt.xlabel('Number of Sheep') plt.ylabel('Mean Episode Reward') plt.xticks(numSheepsList) plt.legend(['Shared reward', 'Individual reward']) plt.show()
def main(): debug = 1 if debug: numWolves = 3 numSheeps = 1 numBlocks = 2 timeconst = 0.5 dampratio = 0.2 saveTraj = False visualizeTraj = True maxTimeStep = 25 sheepSpeedMultiplier = 1.0 individualRewardWolf = 0 hasWalls = 1.5 dt = 0.05 visualizeMujoco = True else: print(sys.argv) condition = json.loads(sys.argv[1]) numWolves = int(condition['numWolves']) numSheeps = int(condition['numSheeps']) numBlocks = int(condition['numBlocks']) saveTraj = True visualizeTraj = False maxTimeStep = 50 sheepSpeedMultiplier = 1.25 individualRewardWolf = 1 print( "maddpg: {} wolves, {} sheep, {} blocks, saveTraj: {}, visualize: {}". format(numWolves, numSheeps, numBlocks, str(saveTraj), str(visualizeTraj))) numAgents = numWolves + numSheeps numEntities = numAgents + numBlocks wolvesID = list(range(numWolves)) sheepsID = list(range(numWolves, numAgents)) blocksID = list(range(numAgents, numEntities)) wolfSize = 0.075 sheepSize = 0.05 blockSize = 0.2 entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [ blockSize ] * numBlocks wolfMaxSpeed = 1.0 blockMaxSpeed = None sheepMaxSpeedOriginal = 1.3 sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [ sheepMaxSpeed ] * numSheeps + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities isCollision = IsCollision(getPosFromAgentState) punishForOutOfBound = PunishForOutOfBound() rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound) if individualRewardWolf: rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList, isCollision) else: rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision) rewardFunc = lambda state, action, nextState: list( rewardWolf(state, action, nextState)) + list( rewardSheep(state, action, nextState)) dirName = os.path.dirname(__file__) # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','numBlocks=1_numSheeps=1_numWolves=1.xml') if hasWalls: # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','hasWalls=1_numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'.format(numBlocks,numSheeps,numWolves,timeconst,dampratio)) physicsDynamicsPath = os.path.join( dirName, '..', '..', 'environment', 'mujocoEnv', 'dt={}'.format(dt), 'hasWalls={}_numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml' .format(hasWalls, numBlocks, numSheeps, numWolves, timeconst, dampratio)) # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','crossTest3w1s2b.xml') else: physicsDynamicsPath = os.path.join( dirName, '..', '..', 'environment', 'mujocoEnv', 'numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml' .format(numBlocks, numSheeps, numWolves, timeconst, dampratio)) with open(physicsDynamicsPath) as f: xml_string = f.read() envXmlDict = xmltodict.parse(xml_string.strip()) envXml = xmltodict.unparse(envXmlDict) physicsModel = mujoco.load_model_from_xml(envXml) physicsSimulation = mujoco.MjSim(physicsModel) print(physicsSimulation.model.body_pos) # print(dir(physicsSimulation.model)) # print(dir(physicsSimulation.data),physicsSimulation.dataphysicsSimulation.data) # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos)) # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos)) qPosInit = [0, 0] * numAgents qVelInit = [0, 0] * numAgents qVelInitNoise = 0 * hasWalls qPosInitNoise = 0.8 * hasWalls getBlockRandomPos = lambda: np.random.uniform(-0.7 * hasWalls, +0.7 * hasWalls, 2) getBlockSpeed = lambda: np.zeros(2) numQPos = len(physicsSimulation.data.qpos) numQVel = len(physicsSimulation.data.qvel) sampleAgentsQPos = lambda: np.asarray(qPosInit) + np.random.uniform( low=-qPosInitNoise, high=qPosInitNoise, size=numQPos) sampleAgentsQVel = lambda: np.asarray(qVelInit) + np.random.uniform( low=-qVelInitNoise, high=qVelInitNoise, size=numQVel) minDistance = 0.2 + 2 * blockSize isOverlap = IsOverlap(minDistance) sampleBlockState = SampleBlockState(numBlocks, getBlockRandomPos, getBlockSpeed, isOverlap) reset = ResetUniformWithoutXPos(physicsSimulation, numAgents, numBlocks, sampleAgentsQPos, sampleAgentsQVel, sampleBlockState) damping = 2.5 numSimulationFrames = int(0.1 / dt) agentsMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed ] * numSheeps reshapeAction = ReshapeAction() isTerminal = lambda state: False transit = TransitionFunctionWithoutXPos(physicsSimulation, numAgents, numSimulationFrames, damping * dt * numSimulationFrames, agentsMaxSpeedList, visualizeMujoco, isTerminal, reshapeAction) maxRunningStepsToSample = 100 sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit, isTerminal, rewardFunc, reset) observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState) observe = lambda state: [ observeOneAgent(agentID)(state) for agentID in range(numAgents) ] initObsForParams = observe(reset()) obsShape = [ initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams)) ] worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [128, 128] # ------------ model ------------------------ buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape) modelsList = [ buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents) ] # individStr = 'individ' if individualRewardWolf else 'shared' # fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format( # numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr) # wolvesModelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg', fileName + str(i) + '60000eps') for i in wolvesID] # [restoreVariables(model, path) for model, path in zip(wolvesModel, wolvesModelPaths)] # # actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy) # policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList] # modelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg_ExpEpsLengthAndSheepSpeed', fileName + str(i)) for i in # range(numAgents)] # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG') # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','timeconst='+str(timeconst)+'dampratio='+str(dampratio)) if hasWalls: # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','hasWalls=1'+'numBlocks='+str(numBlocks)+'numSheeps='+str(numSheeps)+'numWolves='+str(numWolves)+'timeconst='+str(timeconst)+'dampratio='+str(dampratio)) # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','dt={}'.format(dt),'hasWalls='+str(hasWalls)+'numBlocks='+str(numBlocks)+'numSheeps='+str(numSheeps)+'numWolves='+str(numWolves)+'timeconst='+str(timeconst)+'dampratio='+str(dampratio)+'individualRewardWolf='+str(individualRewardWolf)+'sheepSpeedMultiplier='+str(sheepSpeedMultiplier)) # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','dt={}'.format(dt),'hasWalls='+str(hasWalls)+'numBlocks='+str(numBlocks)+'numSheeps='+str(numSheeps)+'numWolves='+str(numWolves)+'timeconst='+str(timeconst)+'dampratio='+str(dampratio)+'individualRewardWolf='+str(individualRewardWolf)+'sheepSpeedMultiplier='+str(sheepSpeedMultiplier)) modelFolder = os.path.join( dirName, '..', 'trainedModels', 'mujocoMADDPGeavluateWall', 'dt={}'.format(dt), 'hasWalls=' + str(hasWalls) + 'numBlocks=' + str(numBlocks) + 'numSheeps=' + str(numSheeps) + 'numWolves=' + str(numWolves) + 'timeconst=' + str(timeconst) + 'dampratio=' + str(dampratio) + 'individualRewardWolf=' + str(individualRewardWolf) + 'sheepSpeedMultiplier=' + str(sheepSpeedMultiplier)) individStr = 'individ' if individualRewardWolf else 'shared' fileName = "maddpghasWalls={}{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format( hasWalls, numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr) else: modelFolder = os.path.join( dirName, '..', 'trainedModels', 'mujocoMADDPG', 'numBlocks=' + str(numBlocks) + 'numSheeps=' + str(numSheeps) + 'numWolves=' + str(numWolves) + 'timeconst=' + str(timeconst) + 'dampratio=' + str(dampratio)) fileName = "maddpg{}wolves{}sheep{}blocks60000episodes25stepSheepSpeed1.0shared_agent".format( numWolves, numSheeps, numBlocks) modelPaths = [ os.path.join(modelFolder, fileName + str(i) + '60000eps') for i in range(numAgents) ] [ restoreVariables(model, path) for model, path in zip(modelsList, modelPaths) ] actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy) policy = lambda allAgentsStates: [ actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList ] trajList = [] numTrajToSample = 50 for i in range(numTrajToSample): np.random.seed(i) traj = sampleTrajectory(policy) trajList.append(list(traj)) # saveTraj if saveTraj: trajFileName = "maddpg{}wolves{}sheep{}blocks{}eps{}stepSheepSpeed{}{}Traj".format( numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr) trajSavePath = os.path.join(dirName, '..', 'trajectory', trajFileName) saveToPickle(trajList, trajSavePath) # visualize if visualizeTraj: entitiesColorList = [wolfColor] * numWolves + [ sheepColor ] * numSheeps + [blockColor] * numBlocks render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState) print(trajList[0][0], '!!!3', trajList[0][1]) trajToRender = np.concatenate(trajList) render(trajToRender)
def simuliateOneParameter(parameterOneCondiiton, evalNum, randomSeed): saveTraj = True visualizeTraj = False visualizeMujoco = False numWolves = parameterOneCondiiton['numWolves'] numSheeps = parameterOneCondiiton['numSheeps'] numBlocks = parameterOneCondiiton['numBlocks'] timeconst = parameterOneCondiiton['timeconst'] dampratio = parameterOneCondiiton['dampratio'] maxTimeStep = parameterOneCondiiton['maxTimeStep'] sheepSpeedMultiplier = parameterOneCondiiton['sheepSpeedMultiplier'] individualRewardWolf = parameterOneCondiiton['individualRewardWolf'] hasWalls = parameterOneCondiiton['hasWalls'] dt = parameterOneCondiiton['dt'] print( "maddpg: {} wolves, {} sheep, {} blocks, saveTraj: {}, visualize: {}". format(numWolves, numSheeps, numBlocks, str(saveTraj), str(visualizeTraj))) numAgents = numWolves + numSheeps numEntities = numAgents + numBlocks wolvesID = list(range(numWolves)) sheepsID = list(range(numWolves, numAgents)) blocksID = list(range(numAgents, numEntities)) wolfSize = 0.075 sheepSize = 0.05 blockSize = 0.2 entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [ blockSize ] * numBlocks wolfMaxSpeed = 1.0 blockMaxSpeed = None sheepMaxSpeedOriginal = 1.3 sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [ sheepMaxSpeed ] * numSheeps + [blockMaxSpeed] * numBlocks entitiesMovableList = [True] * numAgents + [False] * numBlocks massList = [1.0] * numEntities isCollision = IsCollision(getPosFromAgentState) punishForOutOfBound = PunishForOutOfBound() rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound) if individualRewardWolf: rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList, isCollision) else: rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision) rewardFunc = lambda state, action, nextState: list( rewardWolf(state, action, nextState)) + list( rewardSheep(state, action, nextState)) dirName = os.path.dirname(__file__) if hasWalls: physicsDynamicsPath = os.path.join( dirName, '..', '..', 'environment', 'mujocoEnv', 'dt={}'.format(dt), 'hasWalls={}_numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml' .format(hasWalls, numBlocks, numSheeps, numWolves, timeconst, dampratio)) else: physicsDynamicsPath = os.path.join( dirName, '..', '..', 'environment', 'mujocoEnv', 'numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml' .format(numBlocks, numSheeps, numWolves, timeconst, dampratio)) with open(physicsDynamicsPath) as f: xml_string = f.read() envXmlDict = xmltodict.parse(xml_string.strip()) envXml = xmltodict.unparse(envXmlDict) physicsModel = mujoco.load_model_from_xml(envXml) physicsSimulation = mujoco.MjSim(physicsModel) # print(physicsSimulation.model.body_pos) qPosInit = [0, 0] * numAgents qVelInit = [0, 0] * numAgents qVelInitNoise = 0 * hasWalls qPosInitNoise = 0.8 * hasWalls getBlockRandomPos = lambda: np.random.uniform(-0.7 * hasWalls, +0.7 * hasWalls, 2) getBlockSpeed = lambda: np.zeros(2) numQPos = len(physicsSimulation.data.qpos) numQVel = len(physicsSimulation.data.qvel) sampleAgentsQPos = lambda: np.asarray(qPosInit) + np.random.uniform( low=-qPosInitNoise, high=qPosInitNoise, size=numQPos) sampleAgentsQVel = lambda: np.asarray(qVelInit) + np.random.uniform( low=-qVelInitNoise, high=qVelInitNoise, size=numQVel) minDistance = 0.2 + 2 * blockSize isOverlap = IsOverlap(minDistance) sampleBlockState = SampleBlockState(numBlocks, getBlockRandomPos, getBlockSpeed, isOverlap) reset = ResetUniformWithoutXPos(physicsSimulation, numAgents, numBlocks, sampleAgentsQPos, sampleAgentsQVel, sampleBlockState) damping = 2.5 numSimulationFrames = int(0.1 / dt) agentsMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed ] * numSheeps reshapeAction = ReshapeAction() isTerminal = lambda state: False transit = TransitionFunctionWithoutXPos(physicsSimulation, numAgents, numSimulationFrames, damping * dt * numSimulationFrames, agentsMaxSpeedList, visualizeMujoco, isTerminal, reshapeAction) maxRunningStepsToSample = 100 sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit, isTerminal, rewardFunc, reset) observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState) observe = lambda state: [ observeOneAgent(agentID)(state) for agentID in range(numAgents) ] initObsForParams = observe(reset()) obsShape = [ initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams)) ] worldDim = 2 actionDim = worldDim * 2 + 1 layerWidth = [128, 128] # ------------ model ------------------------ buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape) modelsList = [ buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents) ] if hasWalls: modelFolder = os.path.join( dirName, '..', 'trainedModels', 'mujocoMADDPGeavluateWall', 'dt={}'.format(dt), 'hasWalls=' + str(hasWalls) + 'numBlocks=' + str(numBlocks) + 'numSheeps=' + str(numSheeps) + 'numWolves=' + str(numWolves) + 'timeconst=' + str(timeconst) + 'dampratio=' + str(dampratio) + 'individualRewardWolf=' + str(individualRewardWolf) + 'sheepSpeedMultiplier=' + str(sheepSpeedMultiplier)) individStr = 'individ' if individualRewardWolf else 'shared' fileName = "maddpghasWalls={}{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format( hasWalls, numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr) else: modelFolder = os.path.join( dirName, '..', 'trainedModels', 'mujocoMADDPG', 'numBlocks=' + str(numBlocks) + 'numSheeps=' + str(numSheeps) + 'numWolves=' + str(numWolves) + 'timeconst=' + str(timeconst) + 'dampratio=' + str(dampratio)) fileName = "maddpg{}wolves{}sheep{}blocks60000episodes25stepSheepSpeed1.0shared_agent".format( numWolves, numSheeps, numBlocks) modelPaths = [ os.path.join(modelFolder, fileName + str(i) + '60000eps') for i in range(numAgents) ] [ restoreVariables(model, path) for model, path in zip(modelsList, modelPaths) ] actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy) policy = lambda allAgentsStates: [ actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList ] startTime = time.time() trajList = [] for i in range(evalNum): np.random.seed(i) traj = sampleTrajectory(policy) trajList.append(list(traj)) endTime = time.time() print("Time taken {} seconds to generate {} trajectories".format( (endTime - startTime), evalNum)) # saveTraj if saveTraj: trajectoryFolder = os.path.join(dirName, '..', 'trajectory', 'evluateWall') if not os.path.exists(trajectoryFolder): os.makedirs(trajectoryFolder) trajectorySaveExtension = '.pickle' fixedParameters = {'randomSeed': randomSeed, 'evalNum': evalNum} generateTrajectorySavePath = GetSavePath(trajectoryFolder, trajectorySaveExtension, fixedParameters) trajectorySavePath = generateTrajectorySavePath(parameterOneCondiiton) saveToPickle(trajList, trajectorySavePath) # visualize if visualizeTraj: entitiesColorList = [wolfColor] * numWolves + [ sheepColor ] * numSheeps + [blockColor] * numBlocks render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState) print(trajList[0][0], '!!!3', trajList[0][1]) trajToRender = np.concatenate(trajList) render(trajToRender) return endTime - startTime