Ejemplo n.º 1
0
def main():
    stateDim = env.observation_space.shape[0]
    actionDim = env.action_space.n
    buildModel = BuildModel(stateDim, actionDim)
    layersWidths = [30]
    writer, model = buildModel(layersWidths)

    dirName = os.path.dirname(__file__)
    modelPath = os.path.join(
        dirName, '..', 'trainedDQNModels',
        'epsilonIncrease=0.0002_epsilonMin=0_gamma=0.9_learningRate=0.001_maxEpisode=1000_maxTimeStep=2000_minibatchSize=128.ckpt'
    )
    restoreVariables(model, modelPath)
    policy = lambda state: [ActGreedyByModel(getTrainQValue, model)(state)]

    isTerminal = IsTerminalMountCarDiscrete()
    transit = TransitMountCarDiscrete()
    getReward = rewardMountCarDiscrete
    reset = ResetMountCarDiscrete(seed=None, low=-1, high=0.4)

    for i in range(20):
        maxRunningSteps = 2000
        sampleTrajectory = SampleTrajectory(maxRunningSteps, transit,
                                            isTerminal, getReward, reset)
        trajectory = sampleTrajectory(policy)

        # plots& plot
        showDemo = True
        if showDemo:
            visualize = VisualizeMountCarDiscrete()
            visualize(trajectory)
def main():
    stateDim = env.observation_space.shape[0]
    actionDim = env.action_space.shape[0]
    actionHigh = env.action_space.high
    actionLow = env.action_space.low
    actionBound = (actionHigh - actionLow) / 2

    buildActorModel = BuildActorModel(stateDim, actionDim, actionBound)
    actorLayerWidths = [30]
    actorWriter, actorModel = buildActorModel(actorLayerWidths)

    dirName = os.path.dirname(__file__)
    actorModelPath = os.path.join(
        dirName, '..', 'trainedDDPGModels',
        'Eps=300_High=0.4_actorModel=0_batch=128_env=MountainCarContinuous-v0_gam=0.9_lrActor=0.001_lrCritic=0.001_noiseVar=1_resetLow=-1_timeStep=2000_varDiscout=0.99995.ckpt'
    )

    restoreVariables(actorModel, actorModelPath)
    policy = ActDDPGOneStep(actionLow,
                            actionHigh,
                            actByPolicyTrain,
                            actorModel,
                            getNoise=None)

    isTerminal = IsTerminalMountCarContin()
    reset = ResetMountCarContin(seed=None, low=-1, high=0.4)

    transit = TransitGymMountCarContinuous()
    rewardFunc = RewardMountCarContin(isTerminal)

    for i in range(20):
        maxRunningSteps = 2000
        sampleTrajectory = SampleTrajectory(maxRunningSteps, transit,
                                            isTerminal, rewardFunc, reset)
        trajectory = sampleTrajectory(policy)

        # plots& plot
        showDemo = True
        if showDemo:
            visualize = VisualizeMountCarContin()
            visualize(trajectory)
Ejemplo n.º 3
0
def main():
    stateDim = env.observation_space.shape[0]
    actionDim = env.action_space.shape[0]
    actionHigh = env.action_space.high
    actionLow = env.action_space.low
    actionBound = (actionHigh - actionLow) / 2

    buildActorModel = BuildActorModel(stateDim, actionDim, actionBound)
    actorLayerWidths = [30]
    actorWriter, actorModel = buildActorModel(actorLayerWidths)

    actorModelPath = os.path.join(
        dirName, '..', 'trainedDDPGModels',
        'Eps=200_actorModel=0_batch=128_env=Pendulum-v0_gam=0.9_lrActor=0.001_lrCritic=0.001_noiseVar=3_timeStep=200_varDiscout=0.9995.ckpt'
    )
    restoreVariables(actorModel, actorModelPath)
    actOneStep = ActDDPGOneStep(actionLow,
                                actionHigh,
                                actByPolicyTrain,
                                actorModel,
                                getNoise=None)
    policy = lambda state: actOneStep(observe(state))

    isTerminal = isTerminalGymPendulum
    reset = ResetGymPendulum(seed)
    transit = TransitGymPendulum()
    rewardFunc = RewardGymPendulum(angle_normalize)

    for i in range(10):
        maxRunningSteps = 200
        sampleTrajectory = SampleTrajectory(maxRunningSteps, transit,
                                            isTerminal, rewardFunc, reset)
        trajectory = sampleTrajectory(policy)

        # plots& plot
        showDemo = True
        if showDemo:
            visualize = VisualizeGymPendulum()
            visualize(trajectory)
def generateSingleCondition(condition):
    debug = 0
    if debug:


        damping=2.0
        frictionloss=0.0
        masterForce=1.0

        numWolves = 1
        numSheeps = 1
        numMasters = 1
        maxTimeStep = 25

        saveTraj=False
        saveImage=True
        visualizeMujoco=False
        visualizeTraj = True
        makeVideo=True
    else:

        # print(sys.argv)
        # condition = json.loads(sys.argv[1])
        damping = float(condition['damping'])
        frictionloss = float(condition['frictionloss'])
        masterForce = float(condition['masterForce'])

        numWolves = 1
        numSheeps = 1
        numMasters = 1
        maxTimeStep = 25

        saveTraj=False
        saveImage=True
        visualizeMujoco=False
        visualizeTraj = True
        makeVideo=False

    print("maddpg: , saveTraj: {}, visualize: {},damping; {},frictionloss: {}".format( str(saveTraj), str(visualizeMujoco),damping,frictionloss))


    numAgents = numWolves + numSheeps+numMasters
    numEntities = numAgents + numMasters
    wolvesID = [0]
    sheepsID = [1]
    masterID = [2]

    wolfSize = 0.075
    sheepSize = 0.05
    masterSize = 0.075
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [masterSize] * numMasters

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None


    entitiesMovableList = [True] * numAgents + [False] * numMasters
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision,punishForOutOfBound)


    rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)
    rewardMaster= lambda state, action, nextState: [-reward  for reward in rewardWolf(state, action, nextState)]
    rewardFunc = lambda state, action, nextState: \
        list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))+list(rewardMaster(state, action, nextState))

    dirName = os.path.dirname(__file__)
    # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leased.xml')

    makePropertyList=MakePropertyList(transferNumberListToStr)

    geomIds=[1,2,3]
    keyNameList=[0,1]
    valueList=[[damping,damping]]*len(geomIds)
    dampngParameter=makePropertyList(geomIds,keyNameList,valueList)

    changeJointDampingProperty=lambda envDict,geomPropertyDict:changeJointProperty(envDict,geomPropertyDict,'@damping')

    geomIds=[1,2,3]
    keyNameList=[0,1]
    valueList=[[frictionloss,frictionloss]]*len(geomIds)
    frictionlossParameter=makePropertyList(geomIds,keyNameList,valueList)
    changeJointFrictionlossProperty=lambda envDict,geomPropertyDict:changeJointProperty(envDict,geomPropertyDict,'@frictionloss')


    physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leasedNew.xml')
    # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','rope','leased.xml')


    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())




    envXmlDict = xmltodict.parse(xml_string.strip())
    envXmlPropertyDictList=[dampngParameter,frictionlossParameter]
    changeEnvXmlPropertFuntionyList=[changeJointDampingProperty,changeJointFrictionlossProperty]
    for propertyDict,changeXmlProperty in zip(envXmlPropertyDictList,changeEnvXmlPropertFuntionyList):
        envXmlDict=changeXmlProperty(envXmlDict,propertyDict)





    envXml=xmltodict.unparse(envXmlDict)

    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)
    # print(physicsSimulation.model.body_pos)

    # print(dir(physicsSimulation.model))
    # print(dir(physicsSimulation.data),physicsSimulation.dataphysicsSimulation.data)

    # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos))
    # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos))
    qPosInit = (0, ) * 24
    qVelInit = (0, ) * 24
    qPosInitNoise = 0.4
    qVelInitNoise = 0
    numAgent = 2
    tiedAgentId = [0, 2]
    ropePartIndex = list(range(3, 12))
    maxRopePartLength = 0.06
    reset = ResetUniformWithoutXPosForLeashed(physicsSimulation, qPosInit, qVelInit, numAgent, tiedAgentId,ropePartIndex, maxRopePartLength, qPosInitNoise, qVelInitNoise)
    numSimulationFrames=10
    isTerminal= lambda state: False
    reshapeActionList = [ReshapeAction(5),ReshapeAction(5),ReshapeAction(masterForce)]
    transit=TransitionFunctionWithoutXPos(physicsSimulation, numSimulationFrames, visualizeMujoco,isTerminal, reshapeActionList)

    # damping=2.5
    # numSimulationFrames =int(0.1/dt)
    # agentsMaxSpeedList = [wolfMaxSpeed]* numWolves + [sheepMaxSpeed] * numSheeps
    # reshapeAction = ReshapeAction()
    # isTerminal = lambda state: False
    # transit = TransitionFunctionWithoutXPos(physicsSimulation,numAgents , numSimulationFrames,damping*dt*numSimulationFrames,agentsMaxSpeedList,visualizeMujoco,isTerminal,reshapeAction)


    maxRunningStepsToSample = 100
    sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit, isTerminal, rewardFunc, reset)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, masterID, getPosFromAgentState, getVelFromAgentState)
    observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]

    initObsForParams = observe(reset())
    obsShape = [initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams))]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    # ------------ model ------------------------
    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)]


    # individStr = 'individ' if individualRewardWolf else 'shared'
    # fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
    #     numWolves, numSheeps, numMasters, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr)


    # wolvesModelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg', fileName + str(i) + '60000eps') for i in wolvesID]
    # [restoreVariables(model, path) for model, path in zip(wolvesModel, wolvesModelPaths)]
    #
    # actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    # policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList]

    # modelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg_ExpEpsLengthAndSheepSpeed', fileName + str(i)) for i in
    #               range(numAgents)]
    # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG')
    # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','timeconst='+str(timeconst)+'dampratio='+str(dampratio))

    modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPGLeasedFixedEnv2','damping={}_frictionloss={}_masterForce={}'.format(damping,frictionloss,masterForce))
    fileName = "maddpg{}episodes{}step_agent".format(maxEpisode, maxTimeStep)

    modelPaths = [os.path.join(modelFolder,  fileName + str(i) + '60000eps') for i in range(numAgents)]

    [restoreVariables(model, path) for model, path in zip(modelsList, modelPaths)]

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList]


    trajList = []
    numTrajToSample = 5
    for i in range(numTrajToSample):
        np.random.seed(i)
        traj = sampleTrajectory(policy)
        trajList.append(list(traj))

    # saveTraj
    if saveTraj:
        trajFileName = "maddpg{}wolves{}sheep{}blocks{}eps{}stepSheepSpeed{}{}Traj".format(numWolves, numSheeps, numMasters, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr)
        trajSavePath = os.path.join(dirName, '..', 'trajectory', trajFileName)
        saveToPickle(trajList, trajSavePath)


    # visualize
    if visualizeTraj:
        demoFolder = os.path.join(dirName, '..', 'demos', 'mujocoMADDPGLeasedFixedEnv2','damping={}_frictionloss={}_masterForce={}'.format(damping,frictionloss,masterForce))

        if not os.path.exists(demoFolder):
            os.makedirs(demoFolder)
        entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheeps + [masterColor] * numMasters
        render = Render(entitiesSizeList, entitiesColorList, numAgents,demoFolder,saveImage, getPosFromAgentState)
        # print(trajList[0][0],'!!!3',trajList[0][1])
        trajToRender = np.concatenate(trajList)
        render(trajToRender)
def main():
    numAgents = 2
    stateDim = numAgents * 2
    actionLow = -1
    actionHigh = 1
    actionBound = (actionHigh - actionLow) / 2
    actionDim = 2

    buildActorModel = BuildActorModel(stateDim, actionDim, actionBound)
    actorLayerWidths = [64]
    actorWriter, actorModel = buildActorModel(actorLayerWidths)

    dirName = os.path.dirname(__file__)
    actorModelPath = os.path.join(
        dirName, '..', 'trainedDDPGModels',
        'actorModel=0_dimension=2_gamma=0.95_learningRateActor=0.001_learningRateCritic=0.001_maxEpisode=2000_maxTimeStep=20_minibatchSize=32_wolfSpeed=0.5.ckpt'
    )
    # 'actorModel=0_dimension=2_gamma=0.95_learningRateActor=0.01_learningRateCritic=0.01_maxEpisode=400_maxTimeStep=100_minibatchSize=32_wolfSpeed=1.ckpt')
    # 'actorModel=0_dimension=2_gamma=0.95_learningRateActor=0.01_learningRateCritic=0.01_maxEpisode=1000_maxTimeStep=100_minibatchSize=32_wolfSpeed=0.5.ckpt')
    # 'actorModel=0_dimension=2_gamma=0.95_learningRateActor=0.01_learningRateCritic=0.01_maxEpisode=5000_maxTimeStep=100_minibatchSize=32_wolfSpeed=0.5.ckpt')

    restoreVariables(actorModel, actorModelPath)
    sheepPolicy = ActDDPGOneStep(actionLow,
                                 actionHigh,
                                 actByPolicyTrain,
                                 actorModel,
                                 getNoise=None)

    sheepId = 0
    wolfId = 1
    getSheepPos = GetAgentPosFromState(sheepId)
    getWolfPos = GetAgentPosFromState(wolfId)

    wolfSpeed = 0.5
    wolfPolicy = HeatSeekingContinuousDeterministicPolicy(
        getWolfPos, getSheepPos, wolfSpeed)
    xBoundary = (0, 20)
    yBoundary = (0, 20)
    stayWithinBoundary = StayWithinBoundary(xBoundary, yBoundary)
    transit = TransitForNoPhysics(getIntendedNextState, stayWithinBoundary)
    # transit = TransitWithSingleWolf(physicalTransition, wolfPolicy)

    maxTimeStep = 20
    sheepAliveBonus = 1 / maxTimeStep
    sheepTerminalPenalty = 20
    killzoneRadius = 0
    isTerminal = IsTerminal(getWolfPos, getSheepPos, killzoneRadius)
    getBoundaryPunishment = GetBoundaryPunishment(xBoundary,
                                                  yBoundary,
                                                  sheepIndex=0,
                                                  punishmentVal=10)
    rewardSheep = RewardFunctionCompete(sheepAliveBonus, sheepTerminalPenalty,
                                        isTerminal)
    rewardSheepWithBoundaryHeuristics = RewardSheepWithBoundaryHeuristics(
        rewardSheep, getIntendedNextState, getBoundaryPunishment, getSheepPos)

    getSheepAction = lambda actions: [
        actions[sheepId * actionDim], actions[sheepId * actionDim + 1]
    ]
    getReward = lambda state, action, nextState: rewardSheepWithBoundaryHeuristics(
        state, getSheepAction(action), nextState)

    policy = lambda state: list(sheepPolicy(state)) + list(wolfPolicy(state))
    # reset = Reset(xBoundary, yBoundary, numAgents)
    reset = lambda: np.array([10, 10, 15, 5])

    for i in range(10):
        maxRunningSteps = 50
        sampleTrajectory = SampleTrajectory(maxRunningSteps, transit,
                                            isTerminal, getReward, reset)
        trajectory = sampleTrajectory(policy)

        # plots& plot
        showDemo = True
        if showDemo:
            observe = Observe(trajectory, numAgents)

            fullScreen = False
            screenWidth = 800
            screenHeight = 800
            screen = initializeScreen(fullScreen, screenWidth, screenHeight)

            leaveEdgeSpace = 200
            lineWidth = 3
            xBoundary = [leaveEdgeSpace, screenWidth - leaveEdgeSpace * 2]
            yBoundary = [leaveEdgeSpace, screenHeight - leaveEdgeSpace * 2]
            screenColor = THECOLORS['black']
            lineColor = THECOLORS['white']

            drawBackground = DrawBackground(screen, screenColor, xBoundary,
                                            yBoundary, lineColor, lineWidth)
            circleSize = 10
            positionIndex = [0, 1]
            drawState = DrawState(screen, circleSize, positionIndex,
                                  drawBackground)

            numberOfAgents = 2
            chasingColors = [THECOLORS['green'], THECOLORS['red']]
            colorSpace = chasingColors[:numberOfAgents]

            FPS = 60
            chaseTrial = ChaseTrialWithTraj(FPS,
                                            colorSpace,
                                            drawState,
                                            saveImage=True)

            rawXRange = [0, 20]
            rawYRange = [0, 20]
            scaledXRange = [210, 590]
            scaledYRange = [210, 590]
            scaleTrajectory = ScaleTrajectory(positionIndex, rawXRange,
                                              rawYRange, scaledXRange,
                                              scaledYRange)

            oldFPS = 5
            adjustFPS = AdjustDfFPStoTraj(oldFPS, FPS)

            getTrajectory = lambda rawTrajectory: scaleTrajectory(
                adjustFPS(rawTrajectory))
            positionList = [observe(index) for index in range(len(trajectory))]
            positionListToDraw = getTrajectory(positionList)

            currentDir = os.getcwd()
            parentDir = os.path.abspath(os.path.join(currentDir, os.pardir))
            imageFolderName = 'Demo'
            saveImageDir = os.path.join(os.path.join(parentDir, 'chasingDemo'),
                                        imageFolderName)
            if not os.path.exists(saveImageDir):
                os.makedirs(saveImageDir)

            chaseTrial(numberOfAgents, positionListToDraw, saveImageDir)
Ejemplo n.º 6
0
def main():
    debug = 1
    if debug:
        numWolves = 1
        numSheeps = 1
        numBlocks = 1
        saveTraj = False
        visualizeTraj = True
        maxTimeStep = 25
        sheepSpeedMultiplier = 1
        individualRewardWolf = 0

        timeconst=0.5
        dampratio=0.2

    else:
        print(sys.argv)
        condition = json.loads(sys.argv[1])
        numWolves = int(condition['numWolves'])
        numSheeps = int(condition['numSheeps'])
        numBlocks = int(condition['numBlocks'])
        saveTraj = True
        visualizeTraj = False
        maxTimeStep = 50
        sheepSpeedMultiplier = 1.25
        individualRewardWolf = 1

    print("maddpg: {} wolves, {} sheep, {} blocks, saveTraj: {}, visualize: {}".format(numWolves, numSheeps, numBlocks, str(saveTraj), str(visualizeTraj)))


    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks
    wolvesID = list(range(numWolves))
    sheepsID = list(range(numWolves, numAgents))
    blocksID = list(range(numAgents, numEntities))

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [blockSize] * numBlocks

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None
    sheepMaxSpeedOriginal = 1.3
    sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier
    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed] * numSheeps + [blockMaxSpeed] * numBlocks

    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    if individualRewardWolf:
        rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList, isCollision)
    else:
        rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision)

    rewardFunc = lambda state, action, nextState: list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

    reset = ResetMultiAgentChasing(numAgents, numBlocks)
    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState)
    observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]

    reshapeAction = ReshapeAction()
    getCollisionForce = GetCollisionForce()
    applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList)
    applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList,
                                          getCollisionForce, getPosFromAgentState)
    integrateState = IntegrateState(numEntities, entitiesMovableList, massList,
                                    entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState)
    transit = TransitMultiAgentChasing(numEntities, reshapeAction, applyActionForce, applyEnvironForce, integrateState)

    isTerminal = lambda state: False
    maxRunningStepsToSample = 100
    sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit, isTerminal, rewardFunc, reset)

    initObsForParams = observe(reset())
    obsShape = [initObsForParams[obsID].shape[0] for obsID in range(len(initObsForParams))]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    # ------------ model ------------------------
    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)]

    dirName = os.path.dirname(__file__)
    # individStr = 'individ' if individualRewardWolf else 'shared'
    # fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
    #     numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr)
    fileName = "maddpg{}wolves{}sheep{}blocks60000episodes25stepSheepSpeed1shared_agent".format(numWolves, numSheeps, numBlocks)
    # wolvesModelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg', fileName + str(i) + '60000eps') for i in wolvesID]
    # [restoreVariables(model, path) for model, path in zip(wolvesModel, wolvesModelPaths)]
    #
    # actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    # policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList]

    # modelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg_ExpEpsLengthAndSheepSpeed', fileName + str(i)) for i in
    #               range(numAgents)]
    modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','timeconst='+str(timeconst)+'dampratio='+str(dampratio))
    modelPaths = [os.path.join(modelFolder, fileName + str(i) + '60000eps') for i in range(numAgents)]

    [restoreVariables(model, path) for model, path in zip(modelsList, modelPaths)]

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList]


    trajList = []
    numTrajToSample = 50
    for i in range(numTrajToSample):
        traj = sampleTrajectory(policy)
        trajList.append(list(traj))

    # saveTraj
    if saveTraj:
        trajFileName = "maddpg{}wolves{}sheep{}blocks{}eps{}stepSheepSpeed{}{}Traj".format(numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr)
        trajSavePath = os.path.join(dirName, '..', 'trajectory', trajFileName)
        saveToPickle(trajList, trajSavePath)


    # visualize
    if visualizeTraj:
        entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheeps + [blockColor] * numBlocks
        render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState)
        trajToRender = np.concatenate(trajList)
        render(trajToRender)
def main():
    numTrajToSample = 500
    maxRunningStepsToSample = 50

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2

    sheepMaxSpeed = 1.3
    wolfMaxSpeed = 1.0
    blockMaxSpeed = None

    numWolves = 3
    numBlocks = 2
    maxEpisode = 60000

    numSheepsList = [1, 2, 4, 8]
    meanRewardList = []
    seList = []

    for numSheeps in numSheepsList:
        numAgents = numWolves + numSheeps
        numEntities = numAgents + numBlocks
        wolvesID = list(range(numWolves))
        sheepsID = list(range(numWolves, numAgents))
        blocksID = list(range(numAgents, numEntities))

        entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
            blockSize
        ] * numBlocks
        entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
            sheepMaxSpeed
        ] * numSheeps + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True] * numAgents + [False] * numBlocks
        massList = [1.0] * numEntities

        isCollision = IsCollision(getPosFromAgentState)
        rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                isCollision)
        punishForOutOfBound = PunishForOutOfBound()
        rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                                  getPosFromAgentState, isCollision,
                                  punishForOutOfBound)

        rewardFunc = lambda state, action, nextState: \
            list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

        reset = ResetMultiAgentChasing(numAgents, numBlocks)
        observeOneAgent = lambda agentID: Observe(
            agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState,
            getVelFromAgentState)
        observe = lambda state: [
            observeOneAgent(agentID)(state) for agentID in range(numAgents)
        ]

        reshapeAction = ReshapeAction()
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID,
                                            entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList,
                                              entitiesSizeList,
                                              getCollisionForce,
                                              getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList,
                                        massList, entityMaxSpeedList,
                                        getVelFromAgentState,
                                        getPosFromAgentState)
        transit = TransitMultiAgentChasing(numEntities, reshapeAction,
                                           applyActionForce, applyEnvironForce,
                                           integrateState)

        isTerminal = lambda state: False
        sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit,
                                            isTerminal, rewardFunc, reset)

        initObsForParams = observe(reset())
        obsShape = [
            initObsForParams[obsID].shape[0]
            for obsID in range(len(initObsForParams))
        ]

        worldDim = 2
        actionDim = worldDim * 2 + 1

        layerWidth = [128, 128]

        buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
        modelsList = [
            buildMADDPGModels(layerWidth, agentID)
            for agentID in range(numAgents)
        ]

        dirName = os.path.dirname(__file__)
        fileName = "maddpg{}wolves{}sheep{}blocks{}eps_agent".format(
            numWolves, numSheeps, numBlocks, maxEpisode)
        modelPaths = [
            os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg',
                         fileName + str(i) + '60000eps')
            for i in range(numAgents)
        ]

        [
            restoreVariables(model, path)
            for model, path in zip(modelsList, modelPaths)
        ]

        actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
        policy = lambda allAgentsStates: [
            actOneStepOneModel(model, observe(allAgentsStates))
            for model in modelsList
        ]

        trajRewardList = []
        for i in range(numTrajToSample):
            traj = sampleTrajectory(policy)
            trajReward = calcTrajRewardWithSharedWolfReward(traj)
            trajRewardList.append(trajReward)

        meanTrajReward = np.mean(trajRewardList)
        seTrajReward = np.std(trajRewardList) / np.sqrt(
            len(trajRewardList) - 1)
        print('meanTrajReward: ', meanTrajReward)
        meanRewardList.append(meanTrajReward)
        seList.append(seTrajReward)


# ---- individually rewarded wolves ------

    meanRewardListIndividWolf = []
    seListIndividWolf = []
    for numSheeps in numSheepsList:
        numAgents = numWolves + numSheeps
        numEntities = numAgents + numBlocks
        wolvesID = list(range(numWolves))
        sheepsID = list(range(numWolves, numAgents))
        blocksID = list(range(numAgents, numEntities))

        entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
            blockSize
        ] * numBlocks
        entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
            sheepMaxSpeed
        ] * numSheeps + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True] * numAgents + [False] * numBlocks
        massList = [1.0] * numEntities

        isCollision = IsCollision(getPosFromAgentState)
        rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList,
                                          isCollision)
        punishForOutOfBound = PunishForOutOfBound()
        rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                                  getPosFromAgentState, isCollision,
                                  punishForOutOfBound)

        rewardFunc = lambda state, action, nextState: \
            list(rewardWolf(state, action, nextState)) + list(rewardSheep(state, action, nextState))

        reset = ResetMultiAgentChasing(numAgents, numBlocks)
        observeOneAgent = lambda agentID: Observe(
            agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState,
            getVelFromAgentState)
        observe = lambda state: [
            observeOneAgent(agentID)(state) for agentID in range(numAgents)
        ]

        reshapeAction = ReshapeAction()
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID,
                                            entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList,
                                              entitiesSizeList,
                                              getCollisionForce,
                                              getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList,
                                        massList, entityMaxSpeedList,
                                        getVelFromAgentState,
                                        getPosFromAgentState)
        transit = TransitMultiAgentChasing(numEntities, reshapeAction,
                                           applyActionForce, applyEnvironForce,
                                           integrateState)

        isTerminal = lambda state: False
        sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit,
                                            isTerminal, rewardFunc, reset)

        initObsForParams = observe(reset())
        obsShape = [
            initObsForParams[obsID].shape[0]
            for obsID in range(len(initObsForParams))
        ]

        worldDim = 2
        actionDim = worldDim * 2 + 1

        layerWidth = [128, 128]

        buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
        modelsList = [
            buildMADDPGModels(layerWidth, agentID)
            for agentID in range(numAgents)
        ]

        dirName = os.path.dirname(__file__)
        fileName = "maddpgIndividWolf{}wolves{}sheep{}blocks{}eps_agent".format(
            numWolves, numSheeps, numBlocks, maxEpisode)
        modelPaths = [
            os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg',
                         fileName + str(i) + '60000eps')
            for i in range(numAgents)
        ]

        [
            restoreVariables(model, path)
            for model, path in zip(modelsList, modelPaths)
        ]

        actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
        policy = lambda allAgentsStates: [
            actOneStepOneModel(model, observe(allAgentsStates))
            for model in modelsList
        ]

        trajRewardList = []
        for i in range(numTrajToSample):
            traj = sampleTrajectory(policy)
            trajReward = calcTrajRewardWithIndividualWolfReward(traj, wolvesID)
            trajRewardList.append(trajReward)

        meanTrajReward = np.mean(trajRewardList)
        seTrajReward = np.std(trajRewardList) / np.sqrt(
            len(trajRewardList) - 1)

        print('meanTrajReward: ', meanTrajReward)

        meanRewardListIndividWolf.append(meanTrajReward)
        seListIndividWolf.append(seTrajReward)

    print('meanRewardList: ', meanRewardList)
    print('std error: ', seList)
    print('meanRewardListIndividWolf: ', meanRewardListIndividWolf)
    print('std error: ', seListIndividWolf)

    plotResult = True
    if plotResult:
        fig = plt.figure()

        plt.errorbar(numSheepsList, meanRewardList, seList)
        plt.errorbar(numSheepsList, meanRewardListIndividWolf,
                     seListIndividWolf)

        fig.suptitle('MADDPG Performance With 3 Wolves')
        plt.xlabel('Number of Sheep')
        plt.ylabel('Mean Episode Reward')
        plt.xticks(numSheepsList)

        plt.legend(['Shared reward', 'Individual reward'])
        plt.show()
Ejemplo n.º 8
0
def main():
    debug = 1
    if debug:
        numWolves = 3
        numSheeps = 1
        numBlocks = 2
        timeconst = 0.5
        dampratio = 0.2
        saveTraj = False
        visualizeTraj = True
        maxTimeStep = 25
        sheepSpeedMultiplier = 1.0
        individualRewardWolf = 0
        hasWalls = 1.5
        dt = 0.05

        visualizeMujoco = True

    else:
        print(sys.argv)
        condition = json.loads(sys.argv[1])
        numWolves = int(condition['numWolves'])
        numSheeps = int(condition['numSheeps'])
        numBlocks = int(condition['numBlocks'])
        saveTraj = True
        visualizeTraj = False
        maxTimeStep = 50
        sheepSpeedMultiplier = 1.25
        individualRewardWolf = 1

    print(
        "maddpg: {} wolves, {} sheep, {} blocks, saveTraj: {}, visualize: {}".
        format(numWolves, numSheeps, numBlocks, str(saveTraj),
               str(visualizeTraj)))

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks
    wolvesID = list(range(numWolves))
    sheepsID = list(range(numWolves, numAgents))
    blocksID = list(range(numAgents, numEntities))

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numBlocks

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None
    sheepMaxSpeedOriginal = 1.3
    sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier
    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
        sheepMaxSpeed
    ] * numSheeps + [blockMaxSpeed] * numBlocks

    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    if individualRewardWolf:
        rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList,
                                          isCollision)
    else:
        rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                isCollision)

    rewardFunc = lambda state, action, nextState: list(
        rewardWolf(state, action, nextState)) + list(
            rewardSheep(state, action, nextState))
    dirName = os.path.dirname(__file__)
    # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','numBlocks=1_numSheeps=1_numWolves=1.xml')
    if hasWalls:
        # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','hasWalls=1_numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'.format(numBlocks,numSheeps,numWolves,timeconst,dampratio))
        physicsDynamicsPath = os.path.join(
            dirName, '..', '..', 'environment', 'mujocoEnv',
            'dt={}'.format(dt),
            'hasWalls={}_numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'
            .format(hasWalls, numBlocks, numSheeps, numWolves, timeconst,
                    dampratio))

        # physicsDynamicsPath=os.path.join(dirName,'..','..','environment','mujocoEnv','crossTest3w1s2b.xml')

    else:
        physicsDynamicsPath = os.path.join(
            dirName, '..', '..', 'environment', 'mujocoEnv',
            'numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'
            .format(numBlocks, numSheeps, numWolves, timeconst, dampratio))

    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())
    envXml = xmltodict.unparse(envXmlDict)

    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)
    print(physicsSimulation.model.body_pos)

    # print(dir(physicsSimulation.model))
    # print(dir(physicsSimulation.data),physicsSimulation.dataphysicsSimulation.data)

    # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos))
    # print(physicsSimulation.data.qpos,dir(physicsSimulation.data.qpos))
    qPosInit = [0, 0] * numAgents
    qVelInit = [0, 0] * numAgents
    qVelInitNoise = 0 * hasWalls
    qPosInitNoise = 0.8 * hasWalls
    getBlockRandomPos = lambda: np.random.uniform(-0.7 * hasWalls, +0.7 *
                                                  hasWalls, 2)
    getBlockSpeed = lambda: np.zeros(2)

    numQPos = len(physicsSimulation.data.qpos)
    numQVel = len(physicsSimulation.data.qvel)

    sampleAgentsQPos = lambda: np.asarray(qPosInit) + np.random.uniform(
        low=-qPosInitNoise, high=qPosInitNoise, size=numQPos)
    sampleAgentsQVel = lambda: np.asarray(qVelInit) + np.random.uniform(
        low=-qVelInitNoise, high=qVelInitNoise, size=numQVel)

    minDistance = 0.2 + 2 * blockSize
    isOverlap = IsOverlap(minDistance)

    sampleBlockState = SampleBlockState(numBlocks, getBlockRandomPos,
                                        getBlockSpeed, isOverlap)

    reset = ResetUniformWithoutXPos(physicsSimulation, numAgents, numBlocks,
                                    sampleAgentsQPos, sampleAgentsQVel,
                                    sampleBlockState)

    damping = 2.5
    numSimulationFrames = int(0.1 / dt)
    agentsMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed
                                                       ] * numSheeps
    reshapeAction = ReshapeAction()
    isTerminal = lambda state: False
    transit = TransitionFunctionWithoutXPos(physicsSimulation, numAgents,
                                            numSimulationFrames,
                                            damping * dt * numSimulationFrames,
                                            agentsMaxSpeedList,
                                            visualizeMujoco, isTerminal,
                                            reshapeAction)

    maxRunningStepsToSample = 100
    sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit,
                                        isTerminal, rewardFunc, reset)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID,
                                              blocksID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [
        observeOneAgent(agentID)(state) for agentID in range(numAgents)
    ]

    initObsForParams = observe(reset())
    obsShape = [
        initObsForParams[obsID].shape[0]
        for obsID in range(len(initObsForParams))
    ]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    # ------------ model ------------------------
    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [
        buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)
    ]

    # individStr = 'individ' if individualRewardWolf else 'shared'
    # fileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
    #     numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, individStr)

    # wolvesModelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg', fileName + str(i) + '60000eps') for i in wolvesID]
    # [restoreVariables(model, path) for model, path in zip(wolvesModel, wolvesModelPaths)]
    #
    # actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    # policy = lambda allAgentsStates: [actOneStepOneModel(model, observe(allAgentsStates)) for model in modelsList]

    # modelPaths = [os.path.join(dirName, '..', 'trainedModels', '3wolvesMaddpg_ExpEpsLengthAndSheepSpeed', fileName + str(i)) for i in
    #               range(numAgents)]
    # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG')
    # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','timeconst='+str(timeconst)+'dampratio='+str(dampratio))
    if hasWalls:
        # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','hasWalls=1'+'numBlocks='+str(numBlocks)+'numSheeps='+str(numSheeps)+'numWolves='+str(numWolves)+'timeconst='+str(timeconst)+'dampratio='+str(dampratio))
        # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','dt={}'.format(dt),'hasWalls='+str(hasWalls)+'numBlocks='+str(numBlocks)+'numSheeps='+str(numSheeps)+'numWolves='+str(numWolves)+'timeconst='+str(timeconst)+'dampratio='+str(dampratio)+'individualRewardWolf='+str(individualRewardWolf)+'sheepSpeedMultiplier='+str(sheepSpeedMultiplier))
        # modelFolder = os.path.join(dirName, '..', 'trainedModels', 'mujocoMADDPG','dt={}'.format(dt),'hasWalls='+str(hasWalls)+'numBlocks='+str(numBlocks)+'numSheeps='+str(numSheeps)+'numWolves='+str(numWolves)+'timeconst='+str(timeconst)+'dampratio='+str(dampratio)+'individualRewardWolf='+str(individualRewardWolf)+'sheepSpeedMultiplier='+str(sheepSpeedMultiplier))

        modelFolder = os.path.join(
            dirName, '..', 'trainedModels', 'mujocoMADDPGeavluateWall',
            'dt={}'.format(dt),
            'hasWalls=' + str(hasWalls) + 'numBlocks=' + str(numBlocks) +
            'numSheeps=' + str(numSheeps) + 'numWolves=' + str(numWolves) +
            'timeconst=' + str(timeconst) + 'dampratio=' + str(dampratio) +
            'individualRewardWolf=' + str(individualRewardWolf) +
            'sheepSpeedMultiplier=' + str(sheepSpeedMultiplier))
        individStr = 'individ' if individualRewardWolf else 'shared'
        fileName = "maddpghasWalls={}{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
            hasWalls, numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
            sheepSpeedMultiplier, individStr)

    else:
        modelFolder = os.path.join(
            dirName, '..', 'trainedModels', 'mujocoMADDPG',
            'numBlocks=' + str(numBlocks) + 'numSheeps=' + str(numSheeps) +
            'numWolves=' + str(numWolves) + 'timeconst=' + str(timeconst) +
            'dampratio=' + str(dampratio))
        fileName = "maddpg{}wolves{}sheep{}blocks60000episodes25stepSheepSpeed1.0shared_agent".format(
            numWolves, numSheeps, numBlocks)
    modelPaths = [
        os.path.join(modelFolder, fileName + str(i) + '60000eps')
        for i in range(numAgents)
    ]

    [
        restoreVariables(model, path)
        for model, path in zip(modelsList, modelPaths)
    ]

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    policy = lambda allAgentsStates: [
        actOneStepOneModel(model, observe(allAgentsStates))
        for model in modelsList
    ]

    trajList = []
    numTrajToSample = 50
    for i in range(numTrajToSample):
        np.random.seed(i)
        traj = sampleTrajectory(policy)
        trajList.append(list(traj))

    # saveTraj
    if saveTraj:
        trajFileName = "maddpg{}wolves{}sheep{}blocks{}eps{}stepSheepSpeed{}{}Traj".format(
            numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
            sheepSpeedMultiplier, individStr)
        trajSavePath = os.path.join(dirName, '..', 'trajectory', trajFileName)
        saveToPickle(trajList, trajSavePath)

    # visualize
    if visualizeTraj:
        entitiesColorList = [wolfColor] * numWolves + [
            sheepColor
        ] * numSheeps + [blockColor] * numBlocks
        render = Render(entitiesSizeList, entitiesColorList, numAgents,
                        getPosFromAgentState)
        print(trajList[0][0], '!!!3', trajList[0][1])
        trajToRender = np.concatenate(trajList)
        render(trajToRender)
Ejemplo n.º 9
0
def simuliateOneParameter(parameterOneCondiiton, evalNum, randomSeed):
    saveTraj = True
    visualizeTraj = False
    visualizeMujoco = False

    numWolves = parameterOneCondiiton['numWolves']
    numSheeps = parameterOneCondiiton['numSheeps']
    numBlocks = parameterOneCondiiton['numBlocks']
    timeconst = parameterOneCondiiton['timeconst']
    dampratio = parameterOneCondiiton['dampratio']
    maxTimeStep = parameterOneCondiiton['maxTimeStep']
    sheepSpeedMultiplier = parameterOneCondiiton['sheepSpeedMultiplier']
    individualRewardWolf = parameterOneCondiiton['individualRewardWolf']
    hasWalls = parameterOneCondiiton['hasWalls']
    dt = parameterOneCondiiton['dt']

    print(
        "maddpg: {} wolves, {} sheep, {} blocks, saveTraj: {}, visualize: {}".
        format(numWolves, numSheeps, numBlocks, str(saveTraj),
               str(visualizeTraj)))

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks
    wolvesID = list(range(numWolves))
    sheepsID = list(range(numWolves, numAgents))
    blocksID = list(range(numAgents, numEntities))

    wolfSize = 0.075
    sheepSize = 0.05
    blockSize = 0.2
    entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheeps + [
        blockSize
    ] * numBlocks

    wolfMaxSpeed = 1.0
    blockMaxSpeed = None
    sheepMaxSpeedOriginal = 1.3
    sheepMaxSpeed = sheepMaxSpeedOriginal * sheepSpeedMultiplier
    entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
        sheepMaxSpeed
    ] * numSheeps + [blockMaxSpeed] * numBlocks

    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    isCollision = IsCollision(getPosFromAgentState)
    punishForOutOfBound = PunishForOutOfBound()
    rewardSheep = RewardSheep(wolvesID, sheepsID, entitiesSizeList,
                              getPosFromAgentState, isCollision,
                              punishForOutOfBound)

    if individualRewardWolf:
        rewardWolf = RewardWolfIndividual(wolvesID, sheepsID, entitiesSizeList,
                                          isCollision)
    else:
        rewardWolf = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                isCollision)

    rewardFunc = lambda state, action, nextState: list(
        rewardWolf(state, action, nextState)) + list(
            rewardSheep(state, action, nextState))
    dirName = os.path.dirname(__file__)

    if hasWalls:

        physicsDynamicsPath = os.path.join(
            dirName, '..', '..', 'environment', 'mujocoEnv',
            'dt={}'.format(dt),
            'hasWalls={}_numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'
            .format(hasWalls, numBlocks, numSheeps, numWolves, timeconst,
                    dampratio))

    else:
        physicsDynamicsPath = os.path.join(
            dirName, '..', '..', 'environment', 'mujocoEnv',
            'numBlocks={}_numSheeps={}_numWolves={}timeconst={}dampratio={}.xml'
            .format(numBlocks, numSheeps, numWolves, timeconst, dampratio))

    with open(physicsDynamicsPath) as f:
        xml_string = f.read()
    envXmlDict = xmltodict.parse(xml_string.strip())
    envXml = xmltodict.unparse(envXmlDict)

    physicsModel = mujoco.load_model_from_xml(envXml)
    physicsSimulation = mujoco.MjSim(physicsModel)
    # print(physicsSimulation.model.body_pos)

    qPosInit = [0, 0] * numAgents
    qVelInit = [0, 0] * numAgents
    qVelInitNoise = 0 * hasWalls
    qPosInitNoise = 0.8 * hasWalls
    getBlockRandomPos = lambda: np.random.uniform(-0.7 * hasWalls, +0.7 *
                                                  hasWalls, 2)
    getBlockSpeed = lambda: np.zeros(2)

    numQPos = len(physicsSimulation.data.qpos)
    numQVel = len(physicsSimulation.data.qvel)

    sampleAgentsQPos = lambda: np.asarray(qPosInit) + np.random.uniform(
        low=-qPosInitNoise, high=qPosInitNoise, size=numQPos)
    sampleAgentsQVel = lambda: np.asarray(qVelInit) + np.random.uniform(
        low=-qVelInitNoise, high=qVelInitNoise, size=numQVel)

    minDistance = 0.2 + 2 * blockSize
    isOverlap = IsOverlap(minDistance)

    sampleBlockState = SampleBlockState(numBlocks, getBlockRandomPos,
                                        getBlockSpeed, isOverlap)

    reset = ResetUniformWithoutXPos(physicsSimulation, numAgents, numBlocks,
                                    sampleAgentsQPos, sampleAgentsQVel,
                                    sampleBlockState)

    damping = 2.5
    numSimulationFrames = int(0.1 / dt)
    agentsMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed
                                                       ] * numSheeps
    reshapeAction = ReshapeAction()
    isTerminal = lambda state: False
    transit = TransitionFunctionWithoutXPos(physicsSimulation, numAgents,
                                            numSimulationFrames,
                                            damping * dt * numSimulationFrames,
                                            agentsMaxSpeedList,
                                            visualizeMujoco, isTerminal,
                                            reshapeAction)

    maxRunningStepsToSample = 100
    sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, transit,
                                        isTerminal, rewardFunc, reset)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID,
                                              blocksID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [
        observeOneAgent(agentID)(state) for agentID in range(numAgents)
    ]

    initObsForParams = observe(reset())
    obsShape = [
        initObsForParams[obsID].shape[0]
        for obsID in range(len(initObsForParams))
    ]

    worldDim = 2
    actionDim = worldDim * 2 + 1

    layerWidth = [128, 128]

    # ------------ model ------------------------
    buildMADDPGModels = BuildMADDPGModels(actionDim, numAgents, obsShape)
    modelsList = [
        buildMADDPGModels(layerWidth, agentID) for agentID in range(numAgents)
    ]

    if hasWalls:

        modelFolder = os.path.join(
            dirName, '..', 'trainedModels', 'mujocoMADDPGeavluateWall',
            'dt={}'.format(dt),
            'hasWalls=' + str(hasWalls) + 'numBlocks=' + str(numBlocks) +
            'numSheeps=' + str(numSheeps) + 'numWolves=' + str(numWolves) +
            'timeconst=' + str(timeconst) + 'dampratio=' + str(dampratio) +
            'individualRewardWolf=' + str(individualRewardWolf) +
            'sheepSpeedMultiplier=' + str(sheepSpeedMultiplier))
        individStr = 'individ' if individualRewardWolf else 'shared'
        fileName = "maddpghasWalls={}{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}{}_agent".format(
            hasWalls, numWolves, numSheeps, numBlocks, maxEpisode, maxTimeStep,
            sheepSpeedMultiplier, individStr)

    else:
        modelFolder = os.path.join(
            dirName, '..', 'trainedModels', 'mujocoMADDPG',
            'numBlocks=' + str(numBlocks) + 'numSheeps=' + str(numSheeps) +
            'numWolves=' + str(numWolves) + 'timeconst=' + str(timeconst) +
            'dampratio=' + str(dampratio))
        fileName = "maddpg{}wolves{}sheep{}blocks60000episodes25stepSheepSpeed1.0shared_agent".format(
            numWolves, numSheeps, numBlocks)
    modelPaths = [
        os.path.join(modelFolder, fileName + str(i) + '60000eps')
        for i in range(numAgents)
    ]

    [
        restoreVariables(model, path)
        for model, path in zip(modelsList, modelPaths)
    ]

    actOneStepOneModel = ActOneStep(actByPolicyTrainNoisy)
    policy = lambda allAgentsStates: [
        actOneStepOneModel(model, observe(allAgentsStates))
        for model in modelsList
    ]

    startTime = time.time()
    trajList = []
    for i in range(evalNum):
        np.random.seed(i)
        traj = sampleTrajectory(policy)
        trajList.append(list(traj))
    endTime = time.time()
    print("Time taken {} seconds to generate {} trajectories".format(
        (endTime - startTime), evalNum))

    # saveTraj
    if saveTraj:

        trajectoryFolder = os.path.join(dirName, '..', 'trajectory',
                                        'evluateWall')
        if not os.path.exists(trajectoryFolder):
            os.makedirs(trajectoryFolder)

        trajectorySaveExtension = '.pickle'
        fixedParameters = {'randomSeed': randomSeed, 'evalNum': evalNum}
        generateTrajectorySavePath = GetSavePath(trajectoryFolder,
                                                 trajectorySaveExtension,
                                                 fixedParameters)
        trajectorySavePath = generateTrajectorySavePath(parameterOneCondiiton)

        saveToPickle(trajList, trajectorySavePath)

    # visualize
    if visualizeTraj:
        entitiesColorList = [wolfColor] * numWolves + [
            sheepColor
        ] * numSheeps + [blockColor] * numBlocks
        render = Render(entitiesSizeList, entitiesColorList, numAgents,
                        getPosFromAgentState)
        print(trajList[0][0], '!!!3', trajList[0][1])
        trajToRender = np.concatenate(trajList)
        render(trajToRender)

    return endTime - startTime