Beispiel #1
0
 def wolfChooseActionMethod(individualContinuousDistributions):
     centralControlAction = tuple(
         [tuple(sampleFromContinuousSpace(distribution)) for distribution in individualContinuousDistributions])
     return centralControlAction
Beispiel #2
0
    def __call__(self, parameters):
        print(parameters)
        visualizeTraj = False

        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        wolfType = parameters['wolfType']
        sheepConcern = parameters['sheepConcern']
        
        ## MDP Env  
	# state is all multi agent state # action is all multi agent action
        wolvesID = list(range(numWolves))
        sheepsID = list(range(numWolves, numWolves + numSheep))
        possibleWolvesIds = wolvesID
        possibleSheepIds = sheepsID

        numAgents = numWolves + numSheep
        numBlocks = 5 - numWolves
        blocksID = list(range(numAgents, numAgents + numBlocks))
        numEntities = numAgents + numBlocks
        
        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2
        
        sheepMaxSpeed = 1.3 * 1
        wolfMaxSpeed = 1.0 * 1
        blockMaxSpeed = None

        entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [blockSize] * numBlocks
        entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed] * numSheep + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True]* numAgents + [False] * numBlocks
        massList = [1.0] * numEntities
        
        reshapeActionInTransit = lambda action: action
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList,
                                              getCollisionForce, getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList, massList,
                                        entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState)
        transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit, applyActionForce, applyEnvironForce, integrateState)
        
        isCollision = IsCollision(getPosFromAgentState)
        collisonRewardWolf = 1
        punishForOutOfBoundForWolf = lambda stata: 0
        rewardWolf = RewardCentralControlPunishBond(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBoundForWolf, collisonRewardWolf)
        collisonRewardSheep = -1
        punishForOutOfBoundForSheep = PunishForOutOfBound()
        rewardSheep = RewardCentralControlPunishBond(sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBoundForSheep, collisonRewardSheep)

        forwardOneStep = ForwardOneStep(transit, rewardWolf)
        
        reset = ResetMultiAgentChasing(numAgents, numBlocks)
        isTerminal = lambda state: False
        maxRunningSteps = 101
        sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset, forwardOneStep)
        
        ## MDP Policy
        worldDim = 2
        actionDim = worldDim * 2 + 1

        layerWidth = [64 * (numWolves - 1), 64 * (numWolves - 1)]

	# Sheep Part
        # ------------ model ------------------------
        if sheepConcern == 'selfSheep':
            sheepConcernSelfOnly = 1
        if sheepConcern == 'allSheep':
            sheepConcernSelfOnly = 0
        numSheepToObserveWhenSheepSameOrDiff = [numSheep, 1]
        numSheepToObserve = numSheepToObserveWhenSheepSameOrDiff[sheepConcernSelfOnly]

        print(numSheepToObserve)
        sheepModelListOfDiffWolfReward = []
        sheepType = 'mixed'
        if sheepType == 'mixed':
            sheepPrefixList = ['maddpgIndividWolf', 'maddpg']
        else:
            sheepPrefixList = [sheepType]
        for sheepPrefix in sheepPrefixList:
            wolvesIDForSheepObserve = list(range(numWolves))
            sheepsIDForSheepObserve = list(range(numWolves, numSheepToObserve + numWolves))
            blocksIDForSheepObserve = list(range(numSheepToObserve + numWolves, numSheepToObserve + numWolves + numBlocks))
            observeOneAgentForSheep = lambda agentID: Observe(agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve, 
                    blocksIDForSheepObserve, getPosFromAgentState, getVelFromAgentState)
            observeSheep = lambda state: [observeOneAgentForSheep(agentID)(state) for agentID in range(numWolves + numSheepToObserve)]
           
            obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve
            initObsForSheepParams = observeSheep(reset()[obsIDsForSheep])
            obsShapeSheep = [initObsForSheepParams[obsID].shape[0] for obsID in range(len(initObsForSheepParams))]
            
            buildSheepModels = BuildMADDPGModels(actionDim, numWolves + numSheepToObserve, obsShapeSheep)
            sheepModelsList = [buildSheepModels(layerWidth, agentID) for agentID in range(numWolves, numWolves + numSheepToObserve)]

            dirName = os.path.dirname(__file__)
            maxEpisode = 60000
            print(sheepPrefix)
            sheepFileName = "{}wolves{}sheep{}blocks{}eps_agent".format(numWolves, numSheepToObserve, numBlocks, maxEpisode)
            sheepModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', sheepPrefix + sheepFileName + str(i) + '60000eps') 
                    for i in range(numWolves, numWolves + numSheepToObserve)]

            [restoreVariables(model, path) for model, path in zip(sheepModelsList, sheepModelPaths)]
            sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList 
        
        # Sheep Policy Function
        reshapeAction = ReshapeAction()
        actOneStepOneModelSheep = ActOneStep(actByPolicyTrainNoisy)
        
        # Sheep Generate Action
        numAllSheepModels = len(sheepModelListOfDiffWolfReward)

	# Wolves Part        

        # ------------ model ------------------------
        wolvesIDForWolfObserve = list(range(numWolves))
        sheepsIDForWolfObserve = list(range(numWolves, numSheep + numWolves))
        blocksIDForWolfObserve = list(range(numSheep + numWolves, numSheep + numWolves + numBlocks))
        observeOneAgentForWolf = lambda agentID: Observe(agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve, 
                blocksIDForWolfObserve, getPosFromAgentState, getVelFromAgentState)
        observeWolf = lambda state: [observeOneAgentForWolf(agentID)(state) for agentID in range(numWolves + numSheep)]

        obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve
        initObsForWolfParams = observeWolf(reset()[obsIDsForWolf])
        obsShapeWolf = [initObsForWolfParams[obsID].shape[0] for obsID in range(len(initObsForWolfParams))]
        buildWolfModels = BuildMADDPGModels(actionDim, numWolves + numSheep, obsShapeWolf)
        layerWidthForWolf = [64 * (numWolves - 1), 64 * (numWolves - 1)]
        wolfModelsList = [buildWolfModels(layerWidthForWolf, agentID) for agentID in range(numWolves)]

        if wolfType == 'sharedReward':
            prefix = 'maddpg'
        if wolfType == 'individualReward':
            prefix = 'maddpgIndividWolf'
        wolfFileName = "{}wolves{}sheep{}blocks{}eps_agent".format(numWolves, numSheep, numBlocks, maxEpisode)
        wolfModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', prefix + wolfFileName + str(i) + '60000eps') for i in range(numWolves)]
        print(numWolves, obsShapeWolf, wolfModelPaths) 

        [restoreVariables(model, path) for model, path in zip(wolfModelsList, wolfModelPaths)]

        actionDimReshaped = 2
        cov = [0.03 ** 2 for _ in range(actionDimReshaped)]
        buildGaussian = BuildGaussianFixCov(cov)
        actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoNoisy)
        composeWolfPolicy = lambda wolfModel: lambda state: sampleFromContinuousSpace(buildGaussian(
            tuple(reshapeAction(actOneStepOneModelWolf(wolfModel, observeWolf(state))))))
        
        #actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoisy)
        #composeWolfPolicy = lambda wolfModel: lambda state: tuple(reshapeAction(actOneStepOneModelSheep(wolfModel, observeWolf(state))))
        wolvesSampleActions = [composeWolfPolicy(wolfModel) for wolfModel in wolfModelsList]
       
        trajectories = []
        for trajectoryId in range(self.numTrajectories):
            sheepModelsForPolicy = [sheepModelListOfDiffWolfReward[np.random.choice(numAllSheepModels)] for sheepId in possibleSheepIds]
            if sheepConcernSelfOnly:
                composeSheepPolicy = lambda sheepModel : lambda state: {tuple(reshapeAction(actOneStepOneModelSheep(sheepModel, observeSheep(state)))): 1}
                sheepChooseActionMethod = sampleFromDistribution
                sheepSampleActions = [SampleActionOnFixedIntention(selfId, possibleWolvesIds, composeSheepPolicy(sheepModel), sheepChooseActionMethod, blocksID)
                        for selfId, sheepModel in zip(possibleSheepIds, sheepModelsForPolicy)]
            else:
                composeSheepPolicy = lambda sheepModel: lambda state: tuple(reshapeAction(actOneStepOneModelSheep(sheepModel, observeSheep(state))))
                sheepSampleActions = [composeSheepPolicy(sheepModel) for sheepModel in sheepModelsForPolicy]
            allIndividualSampleActions = wolvesSampleActions + sheepSampleActions
            sampleAction = lambda state: [sampleIndividualAction(state) for sampleIndividualAction in allIndividualSampleActions]
            trajectory = sampleTrajectory(sampleAction)
            trajectories.append(trajectory) 
        trajectoryFixedParameters = {'maxRunningSteps': maxRunningSteps}
        self.saveTrajectoryByParameters(trajectories, trajectoryFixedParameters, parameters)
        print(np.mean([len(tra) for tra in trajectories]))
    
        # visualize
        if visualizeTraj:
            wolfColor = np.array([0.85, 0.35, 0.35])
            sheepColor = np.array([0.35, 0.85, 0.35])
            blockColor = np.array([0.25, 0.25, 0.25])
            entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheep + [blockColor] * numBlocks
            render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState)
            trajToRender = np.concatenate(trajectories)
            render(trajToRender)
Beispiel #3
0
    def __call__(self, parameters):
        print(parameters)

        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        numBlocks = 2
        wolfSelfish = 1.0 if parameters[
            'wolfType'] == 'individualReward' else 0.0
        perturbedWolfID = parameters['perturbedWolfID']
        perturbedWolfGoalID = parameters['perturbedWolfGoalID']

        ## MDP Env
        numAgents = numWolves + numSheep
        numEntities = numAgents + numBlocks
        wolvesID = list(range(numWolves))
        sheepsID = list(range(numWolves, numWolves + numSheep))
        blocksID = list(range(numAgents, numEntities))

        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2
        entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [
            blockSize
        ] * numBlocks

        costActionRatio = 0.0
        sheepSpeedMultiplier = 1.0
        sheepMaxSpeed = 1.3 * sheepSpeedMultiplier
        wolfMaxSpeed = 1.0
        blockMaxSpeed = None

        entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
            sheepMaxSpeed
        ] * numSheep + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True] * numAgents + [False] * numBlocks
        massList = [1.0] * numEntities

        collisionReward = 1  # for evaluation, count # of bites
        isCollision = IsCollision(getPosFromAgentState)
        rewardAllWolves = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                     isCollision, collisionReward, wolfSelfish)
        rewardWolf = lambda state, action, nextState: np.sum(
            rewardAllWolves(state, action, nextState))

        reshapeActionInTransit = lambda action: action
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID,
                                            entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList,
                                              entitiesSizeList,
                                              getCollisionForce,
                                              getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList,
                                        massList, entityMaxSpeedList,
                                        getVelFromAgentState,
                                        getPosFromAgentState)
        transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit,
                                           applyActionForce, applyEnvironForce,
                                           integrateState)

        forwardOneStep = ForwardOneStep(transit, rewardWolf)

        reset = ResetMultiAgentChasingWithSeed(numAgents, numBlocks)
        isTerminal = lambda state: False
        maxRunningStepsToSample = 101
        sampleTrajectory = SampleTrajectory(maxRunningStepsToSample,
                                            isTerminal, reset, forwardOneStep)

        ## MDP Policy
        worldDim = 2
        actionDim = worldDim * 2 + 1

        layerWidth = [128, 128]
        maxTimeStep = 75
        maxEpisode = 60000
        dirName = os.path.dirname(__file__)

        # ------------ sheep recover variables ------------------------
        numSheepToObserve = 1
        sheepModelListOfDiffWolfReward = []
        sheepTypeList = [0.0, 1.0]

        for sheepType in sheepTypeList:
            wolvesIDForSheepObserve = list(range(numWolves))
            sheepsIDForSheepObserve = list(
                range(numWolves, numSheepToObserve + numWolves))
            blocksIDForSheepObserve = list(
                range(numSheepToObserve + numWolves,
                      numSheepToObserve + numWolves + numBlocks))
            observeOneAgentForSheep = lambda agentID: Observe(
                agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve,
                blocksIDForSheepObserve, getPosFromAgentState,
                getVelFromAgentState)
            observeSheep = lambda state: [
                observeOneAgentForSheep(agentID)(state)
                for agentID in range(numWolves + numSheepToObserve)
            ]

            obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve
            initObsForSheepParams = observeSheep(reset()[obsIDsForSheep])
            obsShapeSheep = [
                initObsForSheepParams[obsID].shape[0]
                for obsID in range(len(initObsForSheepParams))
            ]

            buildSheepModels = BuildMADDPGModels(actionDim,
                                                 numWolves + numSheepToObserve,
                                                 obsShapeSheep)
            sheepModelsList = [
                buildSheepModels(layerWidth, agentID)
                for agentID in range(numWolves, numWolves + numSheepToObserve)
            ]

            sheepFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
                numWolves, numSheepToObserve, numBlocks, maxEpisode,
                maxTimeStep, sheepSpeedMultiplier, costActionRatio, sheepType)
            sheepModelPaths = [
                os.path.join(dirName, '..', '..', 'data', 'preTrainModel',
                             sheepFileName + str(i))
                for i in range(numWolves, numWolves + numSheepToObserve)
            ]
            [
                restoreVariables(model, path)
                for model, path in zip(sheepModelsList, sheepModelPaths)
            ]
            sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList

        # # actOneStep = ActOneStep(actByPolicyTrainNoisy) #TODO
        actOneStep = ActOneStep(actByPolicyTrainNoNoisy)

        numAllSheepModels = len(sheepModelListOfDiffWolfReward)

        # ------------ wolves recover variables ------------------------

        # ------------ Recover one perturbed wolf for comparison -------
        numSheepForPerturbedWolf = 1
        wolvesIDForPerturbedWolf = wolvesID
        sheepsIDForPerturbedWolf = [sheepsID[perturbedWolfGoalID]]
        blocksIDForPerturbedWolf = list(
            range(numWolves + numSheep,
                  numEntities))  # skip the unattended sheep id

        observeOneAgentForPerturbedWolf = lambda agentID: Observe(
            agentID, wolvesIDForPerturbedWolf, sheepsIDForPerturbedWolf,
            blocksIDForPerturbedWolf, getPosFromAgentState,
            getVelFromAgentState)
        observePerturbedWolf = lambda state: [
            observeOneAgentForPerturbedWolf(agentID)(state)
            for agentID in wolvesIDForPerturbedWolf + sheepsIDForPerturbedWolf
        ]

        initObsForPerturbedWolfParams = observePerturbedWolf(reset())
        obsShapePerturbedWolf = [
            initObsForPerturbedWolfParams[obsID].shape[0]
            for obsID in range(len(initObsForPerturbedWolfParams))
        ]
        buildPerturbedWolfModels = BuildMADDPGModels(
            actionDim, numWolves + numSheepForPerturbedWolf,
            obsShapePerturbedWolf)
        layerWidthForWolf = [128, 128]
        perturbedWolfModel = buildPerturbedWolfModels(layerWidthForWolf,
                                                      perturbedWolfID)

        perturbedWolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
            numWolves, numSheepForPerturbedWolf, numBlocks, maxEpisode,
            maxTimeStep, sheepSpeedMultiplier, costActionRatio, wolfSelfish)
        perturbedWolfModelPath = os.path.join(
            dirName, '..', '..', 'data', 'preTrainModel',
            perturbedWolfFileName + str(perturbedWolfID))
        restoreVariables(perturbedWolfModel, perturbedWolfModelPath)

        # ------------ Recover other wolves trained with multiple goals -------

        wolvesIDForWolfObserve = wolvesID
        sheepsIDForWolfObserve = sheepsID
        blocksIDForWolfObserve = blocksID
        observeOneAgentForWolf = lambda agentID: Observe(
            agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve,
            blocksIDForWolfObserve, getPosFromAgentState, getVelFromAgentState)
        observeWolf = lambda state: [
            observeOneAgentForWolf(agentID)(state)
            for agentID in range(numWolves + numSheep)
        ]

        obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve
        initObsForWolfParams = observeWolf(reset()[obsIDsForWolf])
        obsShapeWolf = [
            initObsForWolfParams[obsID].shape[0]
            for obsID in range(len(initObsForWolfParams))
        ]
        buildWolfModels = BuildMADDPGModels(actionDim, numWolves + numSheep,
                                            obsShapeWolf)
        layerWidthForWolf = [128, 128]
        wolfModelsList = [
            buildWolfModels(layerWidthForWolf, agentID)
            for agentID in range(numWolves)
        ]

        wolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
            numWolves, numSheep, numBlocks, maxEpisode, maxTimeStep,
            sheepSpeedMultiplier, costActionRatio, wolfSelfish)
        wolfModelPaths = [
            os.path.join(dirName, '..', '..', 'data', 'preTrainModel',
                         wolfFileName + str(i)) for i in range(numWolves)
        ]
        [
            restoreVariables(model, path)
            for model, path in zip(wolfModelsList, wolfModelPaths)
        ]

        # ------------ compose  policy ---------------------
        actionDimReshaped = 2
        cov = [0.00000000001**2 for _ in range(actionDimReshaped)]
        buildGaussian = BuildGaussianFixCov(cov)
        reshapeAction = ReshapeAction()

        # unperturbed policy
        composeWolfPolicy = lambda wolfModel: lambda state: sampleFromContinuousSpace(
            buildGaussian(
                tuple(reshapeAction(actOneStep(wolfModel, observeWolf(state))))
            ))
        wolvesSampleActions = [
            composeWolfPolicy(wolfModel) for wolfModel in wolfModelsList
        ]

        # perturbed policy
        composePerturbedWolfPolicy = lambda perturbedModel: lambda state: sampleFromContinuousSpace(
            buildGaussian(
                tuple(
                    reshapeAction(
                        actOneStep(perturbedModel, observePerturbedWolf(state))
                    ))))
        wolvesSampleActionsPerturbed = wolvesSampleActions.copy()
        wolvesSampleActionsPerturbed[
            perturbedWolfID] = composePerturbedWolfPolicy(perturbedWolfModel)

        trajectories = []
        for trajectoryId in range(self.numTrajectories):
            sheepModelsForPolicy = [
                sheepModelListOfDiffWolfReward[np.random.choice(
                    numAllSheepModels)] for sheepId in sheepsID
            ]
            composeSheepPolicy = lambda sheepModel: lambda state: {
                tuple(
                    reshapeAction(actOneStep(sheepModel, observeSheep(state)))):
                1
            }
            sheepChooseActionMethod = sampleFromDistribution
            sheepSampleActions = [
                SampleActionOnFixedIntention(selfId, wolvesID,
                                             composeSheepPolicy(sheepModel),
                                             sheepChooseActionMethod, blocksID)
                for selfId, sheepModel in zip(sheepsID, sheepModelsForPolicy)
            ]

            allIndividualSampleActionsPerturbed = wolvesSampleActionsPerturbed + sheepSampleActions
            sampleActionPerturbed = lambda state: [
                sampleIndividualAction(state) for sampleIndividualAction in
                allIndividualSampleActionsPerturbed
            ]

            trajectory = sampleTrajectory(sampleActionPerturbed)
            trajectories.append(trajectory)

        trajectoryFixedParameters = {
            'maxRunningStepsToSample': maxRunningStepsToSample
        }
        self.saveTrajectoryByParameters(trajectories,
                                        trajectoryFixedParameters, parameters)