コード例 #1
0
    def __call__(self, parameters):
        print(parameters)
        miniBatchSize = parameters['miniBatchSize']
        learningRate = parameters['learningRate']

        model = self.NNModel
        train = self.getTrain(miniBatchSize, learningRate)
        parameters.update({'trainSteps': 0})
        modelSavePath = self.getModelSavePath(parameters)
        saveVariables(model, modelSavePath)

        for trainIntervelIndex in self.trainIntervelIndexes:
            parameters.update(
                {'trainSteps': trainIntervelIndex * self.trainStepsIntervel})
            modelSavePath = self.getModelSavePath(parameters)
            if not os.path.isfile(modelSavePath + '.index'):
                trainedModel = train(model, self.trainData)
                saveVariables(trainedModel, modelSavePath)
            else:
                trainedModel = restoreVariables(model, modelSavePath)
            model = trainedModel
コード例 #2
0
    def __call__(self, parameters):
        print(parameters)
        visualizeTraj = False

        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        wolfType = parameters['wolfType']
        sheepConcern = parameters['sheepConcern']
        
        ## MDP Env  
	# state is all multi agent state # action is all multi agent action
        wolvesID = list(range(numWolves))
        sheepsID = list(range(numWolves, numWolves + numSheep))
        possibleWolvesIds = wolvesID
        possibleSheepIds = sheepsID

        numAgents = numWolves + numSheep
        numBlocks = 5 - numWolves
        blocksID = list(range(numAgents, numAgents + numBlocks))
        numEntities = numAgents + numBlocks
        
        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2
        
        sheepMaxSpeed = 1.3 * 1
        wolfMaxSpeed = 1.0 * 1
        blockMaxSpeed = None

        entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [blockSize] * numBlocks
        entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed] * numSheep + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True]* numAgents + [False] * numBlocks
        massList = [1.0] * numEntities
        
        reshapeActionInTransit = lambda action: action
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList,
                                              getCollisionForce, getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList, massList,
                                        entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState)
        transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit, applyActionForce, applyEnvironForce, integrateState)
        
        isCollision = IsCollision(getPosFromAgentState)
        collisonRewardWolf = 1
        punishForOutOfBoundForWolf = lambda stata: 0
        rewardWolf = RewardCentralControlPunishBond(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBoundForWolf, collisonRewardWolf)
        collisonRewardSheep = -1
        punishForOutOfBoundForSheep = PunishForOutOfBound()
        rewardSheep = RewardCentralControlPunishBond(sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBoundForSheep, collisonRewardSheep)

        forwardOneStep = ForwardOneStep(transit, rewardWolf)
        
        reset = ResetMultiAgentChasing(numAgents, numBlocks)
        isTerminal = lambda state: False
        maxRunningSteps = 101
        sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset, forwardOneStep)
        
        ## MDP Policy
        worldDim = 2
        actionDim = worldDim * 2 + 1

        layerWidth = [64 * (numWolves - 1), 64 * (numWolves - 1)]

	# Sheep Part
        # ------------ model ------------------------
        if sheepConcern == 'selfSheep':
            sheepConcernSelfOnly = 1
        if sheepConcern == 'allSheep':
            sheepConcernSelfOnly = 0
        numSheepToObserveWhenSheepSameOrDiff = [numSheep, 1]
        numSheepToObserve = numSheepToObserveWhenSheepSameOrDiff[sheepConcernSelfOnly]

        print(numSheepToObserve)
        sheepModelListOfDiffWolfReward = []
        sheepType = 'mixed'
        if sheepType == 'mixed':
            sheepPrefixList = ['maddpgIndividWolf', 'maddpg']
        else:
            sheepPrefixList = [sheepType]
        for sheepPrefix in sheepPrefixList:
            wolvesIDForSheepObserve = list(range(numWolves))
            sheepsIDForSheepObserve = list(range(numWolves, numSheepToObserve + numWolves))
            blocksIDForSheepObserve = list(range(numSheepToObserve + numWolves, numSheepToObserve + numWolves + numBlocks))
            observeOneAgentForSheep = lambda agentID: Observe(agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve, 
                    blocksIDForSheepObserve, getPosFromAgentState, getVelFromAgentState)
            observeSheep = lambda state: [observeOneAgentForSheep(agentID)(state) for agentID in range(numWolves + numSheepToObserve)]
           
            obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve
            initObsForSheepParams = observeSheep(reset()[obsIDsForSheep])
            obsShapeSheep = [initObsForSheepParams[obsID].shape[0] for obsID in range(len(initObsForSheepParams))]
            
            buildSheepModels = BuildMADDPGModels(actionDim, numWolves + numSheepToObserve, obsShapeSheep)
            sheepModelsList = [buildSheepModels(layerWidth, agentID) for agentID in range(numWolves, numWolves + numSheepToObserve)]

            dirName = os.path.dirname(__file__)
            maxEpisode = 60000
            print(sheepPrefix)
            sheepFileName = "{}wolves{}sheep{}blocks{}eps_agent".format(numWolves, numSheepToObserve, numBlocks, maxEpisode)
            sheepModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', sheepPrefix + sheepFileName + str(i) + '60000eps') 
                    for i in range(numWolves, numWolves + numSheepToObserve)]

            [restoreVariables(model, path) for model, path in zip(sheepModelsList, sheepModelPaths)]
            sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList 
        
        # Sheep Policy Function
        reshapeAction = ReshapeAction()
        actOneStepOneModelSheep = ActOneStep(actByPolicyTrainNoisy)
        
        # Sheep Generate Action
        numAllSheepModels = len(sheepModelListOfDiffWolfReward)

	# Wolves Part        

        # ------------ model ------------------------
        wolvesIDForWolfObserve = list(range(numWolves))
        sheepsIDForWolfObserve = list(range(numWolves, numSheep + numWolves))
        blocksIDForWolfObserve = list(range(numSheep + numWolves, numSheep + numWolves + numBlocks))
        observeOneAgentForWolf = lambda agentID: Observe(agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve, 
                blocksIDForWolfObserve, getPosFromAgentState, getVelFromAgentState)
        observeWolf = lambda state: [observeOneAgentForWolf(agentID)(state) for agentID in range(numWolves + numSheep)]

        obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve
        initObsForWolfParams = observeWolf(reset()[obsIDsForWolf])
        obsShapeWolf = [initObsForWolfParams[obsID].shape[0] for obsID in range(len(initObsForWolfParams))]
        buildWolfModels = BuildMADDPGModels(actionDim, numWolves + numSheep, obsShapeWolf)
        layerWidthForWolf = [64 * (numWolves - 1), 64 * (numWolves - 1)]
        wolfModelsList = [buildWolfModels(layerWidthForWolf, agentID) for agentID in range(numWolves)]

        if wolfType == 'sharedReward':
            prefix = 'maddpg'
        if wolfType == 'individualReward':
            prefix = 'maddpgIndividWolf'
        wolfFileName = "{}wolves{}sheep{}blocks{}eps_agent".format(numWolves, numSheep, numBlocks, maxEpisode)
        wolfModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', prefix + wolfFileName + str(i) + '60000eps') for i in range(numWolves)]
        print(numWolves, obsShapeWolf, wolfModelPaths) 

        [restoreVariables(model, path) for model, path in zip(wolfModelsList, wolfModelPaths)]

        actionDimReshaped = 2
        cov = [0.03 ** 2 for _ in range(actionDimReshaped)]
        buildGaussian = BuildGaussianFixCov(cov)
        actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoNoisy)
        composeWolfPolicy = lambda wolfModel: lambda state: sampleFromContinuousSpace(buildGaussian(
            tuple(reshapeAction(actOneStepOneModelWolf(wolfModel, observeWolf(state))))))
        
        #actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoisy)
        #composeWolfPolicy = lambda wolfModel: lambda state: tuple(reshapeAction(actOneStepOneModelSheep(wolfModel, observeWolf(state))))
        wolvesSampleActions = [composeWolfPolicy(wolfModel) for wolfModel in wolfModelsList]
       
        trajectories = []
        for trajectoryId in range(self.numTrajectories):
            sheepModelsForPolicy = [sheepModelListOfDiffWolfReward[np.random.choice(numAllSheepModels)] for sheepId in possibleSheepIds]
            if sheepConcernSelfOnly:
                composeSheepPolicy = lambda sheepModel : lambda state: {tuple(reshapeAction(actOneStepOneModelSheep(sheepModel, observeSheep(state)))): 1}
                sheepChooseActionMethod = sampleFromDistribution
                sheepSampleActions = [SampleActionOnFixedIntention(selfId, possibleWolvesIds, composeSheepPolicy(sheepModel), sheepChooseActionMethod, blocksID)
                        for selfId, sheepModel in zip(possibleSheepIds, sheepModelsForPolicy)]
            else:
                composeSheepPolicy = lambda sheepModel: lambda state: tuple(reshapeAction(actOneStepOneModelSheep(sheepModel, observeSheep(state))))
                sheepSampleActions = [composeSheepPolicy(sheepModel) for sheepModel in sheepModelsForPolicy]
            allIndividualSampleActions = wolvesSampleActions + sheepSampleActions
            sampleAction = lambda state: [sampleIndividualAction(state) for sampleIndividualAction in allIndividualSampleActions]
            trajectory = sampleTrajectory(sampleAction)
            trajectories.append(trajectory) 
        trajectoryFixedParameters = {'maxRunningSteps': maxRunningSteps}
        self.saveTrajectoryByParameters(trajectories, trajectoryFixedParameters, parameters)
        print(np.mean([len(tra) for tra in trajectories]))
    
        # visualize
        if visualizeTraj:
            wolfColor = np.array([0.85, 0.35, 0.35])
            sheepColor = np.array([0.35, 0.85, 0.35])
            blockColor = np.array([0.25, 0.25, 0.25])
            entitiesColorList = [wolfColor] * numWolves + [sheepColor] * numSheep + [blockColor] * numBlocks
            render = Render(entitiesSizeList, entitiesColorList, numAgents, getPosFromAgentState)
            trajToRender = np.concatenate(trajectories)
            render(trajToRender)
コード例 #3
0
    def __call__(self, parameters):
        print(parameters)

        valuePriorEndTime = -100
        deviationFor2DAction = 1.0
        rationalityBetaInInference = 1.0

        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        wolfType = parameters['wolfType']
        wolfSelfish = 0.0 if wolfType == 'sharedAgencyBySharedRewardWolf' else 1.0
        perturbedWolfID = parameters['perturbedWolfID']
        perturbedWolfGoalID = parameters['perturbedWolfGoalID']

        ## MDP Env
        numBlocks = 2
        numAgents = numWolves + numSheep
        numEntities = numAgents + numBlocks
        wolvesID = list(range(numWolves))
        sheepsID = list(range(numWolves, numWolves + numSheep))
        blocksID = list(range(numAgents, numEntities))

        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2
        entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [blockSize] * numBlocks

        costActionRatio = 0.0
        sheepSpeedMultiplier = 1.0
        sheepMaxSpeed = 1.3 * sheepSpeedMultiplier
        wolfMaxSpeed = 1.0
        blockMaxSpeed = None

        entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [sheepMaxSpeed] * numSheep + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True] * numAgents + [False] * numBlocks
        massList = [1.0] * numEntities

        collisionReward = 1  # for evaluation, count # of bites
        isCollision = IsCollision(getPosFromAgentState)
        rewardAllWolves = RewardWolf(wolvesID, sheepsID, entitiesSizeList, isCollision, collisionReward, wolfSelfish)
        rewardWolf = lambda state, action, nextState: np.sum(rewardAllWolves(state, action, nextState))

        reshapeActionInTransit = lambda action: action
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList, getCollisionForce,
                                              getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList, massList, entityMaxSpeedList,
                                        getVelFromAgentState, getPosFromAgentState)
        transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit, applyActionForce, applyEnvironForce,
                                           integrateState)

        forwardOneStep = ForwardOneStep(transit, rewardWolf)

        reset = ResetMultiAgentChasingWithSeed(numAgents, numBlocks)
        isTerminal = lambda state: False
        maxRunningStepsToSample = 101
        sampleTrajectory = SampleTrajectory(maxRunningStepsToSample, isTerminal, reset, forwardOneStep)

        ## MDP Policy
        worldDim = 2
        actionDim = worldDim * 2 + 1

        layerWidth = [128, 128]
        maxTimeStep = 75
        maxEpisode = 60000
        dirName = os.path.dirname(__file__)

        # ------------ sheep recover variables ------------------------
        numSheepToObserve = 1
        sheepModelListOfDiffWolfReward = []
        sheepTypeList = [0.0, 1.0]

        for sheepType in sheepTypeList:
            wolvesIDForSheepObserve = list(range(numWolves))
            sheepsIDForSheepObserve = list(range(numWolves, numSheepToObserve + numWolves))
            blocksIDForSheepObserve = list(
                range(numSheepToObserve + numWolves, numSheepToObserve + numWolves + numBlocks))
            observeOneAgentForSheep = lambda agentID: Observe(agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve,
                                                              blocksIDForSheepObserve, getPosFromAgentState,
                                                              getVelFromAgentState)
            observeSheep = lambda state: [observeOneAgentForSheep(agentID)(state) for agentID in
                                          range(numWolves + numSheepToObserve)]

            obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve
            initObsForSheepParams = observeSheep(reset()[obsIDsForSheep])
            obsShapeSheep = [initObsForSheepParams[obsID].shape[0] for obsID in range(len(initObsForSheepParams))]

            buildSheepModels = BuildMADDPGModels(actionDim, numWolves + numSheepToObserve, obsShapeSheep)
            sheepModelsList = [buildSheepModels(layerWidth, agentID) for agentID in
                               range(numWolves, numWolves + numSheepToObserve)]

            sheepFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
                numWolves, numSheepToObserve, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio,
                sheepType)
            sheepModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', sheepFileName + str(i)) for i
                               in range(numWolves, numWolves + numSheepToObserve)]
            [restoreVariables(model, path) for model, path in zip(sheepModelsList, sheepModelPaths)]
            sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList

        actOneStep = ActOneStep(actByPolicyTrainNoNoisy)
        numAllSheepModels = len(sheepModelListOfDiffWolfReward)

        # ------------ recover variables for "we" ------------------------
        numAgentsInWe = numWolves
        numSheepInWe = 1
        numBlocksForWe = numBlocks
        wolvesIDForWolfObserve = list(range(numAgentsInWe))
        sheepsIDForWolfObserve = list(range(numAgentsInWe, numSheepInWe + numAgentsInWe))
        blocksIDForWolfObserve = list(
            range(numSheepInWe + numAgentsInWe, numSheepInWe + numAgentsInWe + numBlocksForWe))

        observeOneAgentForWolf = lambda agentID: Observe(agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve,
                                                         blocksIDForWolfObserve, getPosFromAgentState,
                                                         getVelFromAgentState)
        observeWolf = lambda state: [observeOneAgentForWolf(agentID)(state) for agentID in
                                     range(numAgentsInWe + numSheepInWe)]

        obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve
        initObsForWolfParams = observeWolf(reset()[obsIDsForWolf])
        obsShapeWolf = [initObsForWolfParams[obsID].shape[0] for obsID in range(len(initObsForWolfParams))]
        buildWolfModels = BuildMADDPGModels(actionDim, numAgentsInWe + numSheepInWe, obsShapeWolf)
        layerWidthForWolf = [128, 128]
        wolfModelsList = [buildWolfModels(layerWidthForWolf, agentID) for agentID in range(numAgentsInWe)]

        wolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
            numWolves, numSheepInWe, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio,
            wolfSelfish)
        wolfModelPaths = [os.path.join(dirName, '..', '..', 'data', 'preTrainModel', wolfFileName + str(i)) for i in
                          range(numAgentsInWe)]
        [restoreVariables(model, path) for model, path in zip(wolfModelsList, wolfModelPaths)]

        # ------------ compose wolves policy no perturbation ------------------------
        actionDimReshaped = 2
        cov = [deviationFor2DAction ** 2 for _ in range(actionDimReshaped)] # 1
        buildGaussian = BuildGaussianFixCov(cov)
        actOneStep = ActOneStep(actByPolicyTrainNoNoisy)
        reshapeAction = ReshapeAction()
        composeCentralControlPolicy = lambda observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction(
            reshapeAction, observe, actOneStep, buildGaussian)
        wolvesCentralControlPolicy = [composeCentralControlPolicy(observeWolf)(wolfModelsList, numAgentsInWe)]  # input state, return a list of gaussian distributions with cov 1

        softPolicyInInference = lambda distribution: distribution
        getStateThirdPersonPerspective = lambda state, goalId, weIds: getStateOrActionThirdPersonPerspective(state,
                                                                                                             goalId,
                                                                                                             weIds,
                                                                                                             blocksID)  # nochange
        policyForCommittedAgentsInInference = PolicyForCommittedAgent(wolvesCentralControlPolicy, softPolicyInInference,
                                                                      getStateThirdPersonPerspective) # same as wolvesCentralControlPolicy(state)
        concernedAgentsIds = wolvesID
        calCommittedAgentsPolicyLikelihood = CalCommittedAgentsContinuousPolicyLikelihood(concernedAgentsIds,
                                                                                          policyForCommittedAgentsInInference,
                                                                                          rationalityBetaInInference)

        randomActionSpace = [(5, 0), (3.5, 3.5), (0, 5), (-3.5, 3.5), (-5, 0), (-3.5, -3.5), (0, -5), (3.5, -3.5),
                             (0, 0)]
        randomPolicy = RandomPolicy(randomActionSpace)
        getStateFirstPersonPerspective = lambda state, goalId, weIds, selfId: getStateOrActionFirstPersonPerspective(
            state, goalId, weIds, selfId, blocksID)
        policyForUncommittedAgentsInInference = PolicyForUncommittedAgent(wolvesID, randomPolicy, softPolicyInInference,
                                                                          getStateFirstPersonPerspective)  # random policy, returns action distribution
        calUncommittedAgentsPolicyLikelihood = CalUncommittedAgentsPolicyLikelihood(wolvesID, concernedAgentsIds,
                                                                                    policyForUncommittedAgentsInInference)  # returns 1

        # Joint Likelihood
        calJointLikelihood = lambda intention, state, perceivedAction: calCommittedAgentsPolicyLikelihood(intention,
                                                                                                          state,
                                                                                                          perceivedAction) * \
                                                                       calUncommittedAgentsPolicyLikelihood(intention,
                                                                                                            state,
                                                                                                            perceivedAction)  # __* 1

        # ------------ wolves intention ------------------------
        intentionSpacesForAllWolves = [tuple(it.product(sheepsID, [tuple(wolvesID)])) for wolfId in
                                       wolvesID]  # <class 'tuple'>: ((3, (0, 1, 2)), (4, (0, 1, 2)), (5, (0, 1, 2)), (6, (0, 1, 2)))
        print('intentionSpacesForAllWolves', intentionSpacesForAllWolves)
        wolvesIntentionPriors = [
            {tuple(intention): 1 / len(allPossibleIntentionsOneWolf) for intention in allPossibleIntentionsOneWolf} for
            allPossibleIntentionsOneWolf in intentionSpacesForAllWolves]
        perceptSelfAction = SampleNoisyAction(deviationFor2DAction)
        perceptOtherAction = SampleNoisyAction(deviationFor2DAction)
        perceptAction = PerceptImaginedWeAction(wolvesID, perceptSelfAction,
                                                perceptOtherAction)  # input self, others action

        # Infer and update Intention
        variablesForAllWolves = [[intentionSpace] for intentionSpace in intentionSpacesForAllWolves]
        jointHypothesisSpaces = [pd.MultiIndex.from_product(variables, names=['intention']) for variables in
                                 variablesForAllWolves]
        concernedHypothesisVariable = ['intention']
        priorDecayRate = 1
        softPrior = SoftDistribution(priorDecayRate)  # no change
        inferIntentionOneStepList = [InferOneStep(jointHypothesisSpace, concernedHypothesisVariable,
                                                  calJointLikelihood, softPrior) for jointHypothesisSpace in
                                     jointHypothesisSpaces]

        if numSheep == 1:
            inferIntentionOneStepList = [lambda prior, state, action: prior] * 3

        adjustIntentionPriorGivenValueOfState = lambda state: 1
        chooseIntention = sampleFromDistribution
        updateIntentions = [UpdateIntention(intentionPrior, valuePriorEndTime, adjustIntentionPriorGivenValueOfState,
                                            perceptAction, inferIntentionOneStep, chooseIntention)
                            for intentionPrior, inferIntentionOneStep in
                            zip(wolvesIntentionPriors, inferIntentionOneStepList)]

        # reset intention and adjust intention prior attributes tools for multiple trajectory
        intentionResetAttributes = ['timeStep', 'lastState', 'lastAction', 'intentionPrior', 'formerIntentionPriors']
        intentionResetAttributeValues = [
            dict(zip(intentionResetAttributes, [0, None, None, intentionPrior, [intentionPrior]]))
            for intentionPrior in wolvesIntentionPriors]
        resetIntentions = ResetObjects(intentionResetAttributeValues, updateIntentions)
        returnAttributes = ['formerIntentionPriors']
        getIntentionDistributions = GetObjectsValuesOfAttributes(returnAttributes, updateIntentions[1:])
        attributesToRecord = ['lastAction']
        recordActionForUpdateIntention = RecordValuesForObjects(attributesToRecord, updateIntentions)

        # Wovels Generate Action #TODO
        covForPlanning = [0.00000001 for _ in range(actionDimReshaped)]
        # covForPlanning = [0.03 ** 2 for _ in range(actionDimReshaped)]
        buildGaussianForPlanning = BuildGaussianFixCov(covForPlanning)
        composeCentralControlPolicyForPlanning = lambda \
                observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction(reshapeAction,
                                                                                    observe, actOneStep,
                                                                                    buildGaussianForPlanning)
        wolvesCentralControlPoliciesForPlanning = [
            composeCentralControlPolicyForPlanning(observeWolf)(wolfModelsList, numAgentsInWe)]

        centralControlPolicyListBasedOnNumAgentsInWeForPlanning = wolvesCentralControlPoliciesForPlanning  # 0 for two agents in We, 1 for three agents...
        softPolicyInPlanning = lambda distribution: distribution
        policyForCommittedAgentInPlanning = PolicyForCommittedAgent(
            centralControlPolicyListBasedOnNumAgentsInWeForPlanning, softPolicyInPlanning,
            getStateThirdPersonPerspective)

        policyForUncommittedAgentInPlanning = PolicyForUncommittedAgent(wolvesID, randomPolicy, softPolicyInPlanning,
                                                                        getStateFirstPersonPerspective)

        def wolfChooseActionMethod(individualContinuousDistributions):
            centralControlAction = tuple(
                [tuple(sampleFromContinuousSpace(distribution)) for distribution in individualContinuousDistributions])
            return centralControlAction

        getSelfActionIDInThirdPersonPerspective = lambda weIds, selfId: list(weIds).index(selfId)
        chooseCommittedAction = GetActionFromJointActionDistribution(wolfChooseActionMethod,
                                                                     getSelfActionIDInThirdPersonPerspective)
        chooseUncommittedAction = sampleFromDistribution
        wolvesSampleIndividualActionGivenIntentionList = [
            SampleIndividualActionGivenIntention(selfId, policyForCommittedAgentInPlanning,
                                                 policyForUncommittedAgentInPlanning, chooseCommittedAction,
                                                 chooseUncommittedAction)
            for selfId in wolvesID]

        # ------------------- recover one wolf model that only concerns sheep 0 -------------------
        numSheepForPerturbedWolf = 1
        wolvesIDForPerturbedWolf = wolvesID
        sheepsIDForPerturbedWolf = [sheepsID[perturbedWolfGoalID]]
        blocksIDForPerturbedWolf = list(range(numWolves + numSheep, numEntities)) # skip the unattended sheep id

        observeOneAgentForPerturbedWolf = lambda agentID: Observe(agentID, wolvesIDForPerturbedWolf, sheepsIDForPerturbedWolf,
                blocksIDForPerturbedWolf, getPosFromAgentState, getVelFromAgentState)
        observePerturbedWolf = lambda state: [observeOneAgentForPerturbedWolf(agentID)(state) for agentID in wolvesIDForPerturbedWolf + sheepsIDForPerturbedWolf]

        initObsForPerturbedWolfParams = observePerturbedWolf(reset())
        obsShapePerturbedWolf = [initObsForPerturbedWolfParams[obsID].shape[0] for obsID in range(len(initObsForPerturbedWolfParams))]
        buildPerturbedWolfModels = BuildMADDPGModels(actionDim, numWolves + numSheepForPerturbedWolf, obsShapePerturbedWolf)
        layerWidthForWolf = [128, 128]
        perturbedWolfModel = buildPerturbedWolfModels(layerWidthForWolf, perturbedWolfID)

        perturbedWolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
            numWolves, numSheepForPerturbedWolf, numBlocks, maxEpisode, maxTimeStep, sheepSpeedMultiplier, costActionRatio, wolfSelfish)
        perturbedWolfModelPath = os.path.join(dirName, '..', '..', 'data', 'preTrainModel', perturbedWolfFileName + str(perturbedWolfID))
        restoreVariables(perturbedWolfModel, perturbedWolfModelPath)


        # ------------------- Sample and Save Trajectory -------------------

        wolvesSampleActions = [
            SampleActionOnChangableIntention(updateIntention, wolvesSampleIndividualActionGivenIntention)
            for updateIntention, wolvesSampleIndividualActionGivenIntention in
            zip(updateIntentions, wolvesSampleIndividualActionGivenIntentionList)]

        perturbedWolfSampleActions = lambda state: tuple(reshapeAction(actOneStep(perturbedWolfModel, observePerturbedWolf(state))))
        wolvesSampleActionsPerturbed = wolvesSampleActions#.copy()
        wolvesSampleActionsPerturbed[perturbedWolfID] = perturbedWolfSampleActions


        trajectoriesWithIntentionDists = []
        for trajectoryId in range(self.numTrajectories):
            sheepModelsForPolicy = [sheepModelListOfDiffWolfReward[np.random.choice(numAllSheepModels)] for sheepId in
                                    sheepsID]
            composeSheepPolicy = lambda sheepModel: lambda state: {
                tuple(reshapeAction(actOneStep(sheepModel, observeSheep(state)))): 1}
            sheepChooseActionMethod = sampleFromDistribution
            sheepSampleActions = [SampleActionOnFixedIntention(selfId, wolvesID, composeSheepPolicy(sheepModel),
                                                               sheepChooseActionMethod, blocksID)
                                  for selfId, sheepModel in zip(sheepsID, sheepModelsForPolicy)]
            allIndividualSampleActions = wolvesSampleActions + sheepSampleActions

            sampleActionMultiAgent = SampleActionMultiagent(allIndividualSampleActions, recordActionForUpdateIntention)

            allIndividualSampleActionsPerturbed = wolvesSampleActionsPerturbed + sheepSampleActions
            sampleActionMultiAgentPerturbed = SampleActionMultiagent(allIndividualSampleActionsPerturbed, recordActionForUpdateIntention)

            # trajectory = sampleTrajectory(sampleActionMultiAgentPerturbed)
            trajectory = sampleTrajectory(sampleActionMultiAgentPerturbed)

            intentionDistributions = getIntentionDistributions()
            trajectoryWithIntentionDists = [tuple(list(SASRPair) + list(intentionDist)) for SASRPair, intentionDist in
                                            zip(trajectory, intentionDistributions)]
            trajectoriesWithIntentionDists.append(tuple(trajectoryWithIntentionDists))
            # trajectoriesWithIntentionDists.append(trajectory)
            resetIntentions()
        trajectoryFixedParameters = {'maxRunningStepsToSample': maxRunningStepsToSample}
        self.saveTrajectoryByParameters(trajectoriesWithIntentionDists, trajectoryFixedParameters, parameters)
コード例 #4
0
    def __call__(self, parameters):
        print(parameters)
        numWolves = parameters['numWolves']
        numSheep = 1
        
        ## MDP Env  
	# state is all multi agent state # action is all multi agent action
        xBoundary = [0,600]
        yBoundary = [0,600]
        numOfAgent = numWolves + numSheep
        reset = Reset(xBoundary, yBoundary, numOfAgent)

        possibleSheepIds = list(range(numSheep))
        possibleWolvesIds = list(range(numSheep, numSheep + numWolves))
        getSheepStatesFromAll = lambda state: np.array(state)[possibleSheepIds]
        getWolvesStatesFromAll = lambda state: np.array(state)[possibleWolvesIds]
        killzoneRadius = 50
        isTerminal = IsTerminal(killzoneRadius, getSheepStatesFromAll, getWolvesStatesFromAll)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(xBoundary, yBoundary)
        interpolateOneFrame = InterpolateOneFrame(stayInBoundaryByReflectVelocity)
        numFramesToInterpolate = 3
        transit = TransitWithTerminalCheckOfInterpolation(numFramesToInterpolate, interpolateOneFrame, isTerminal)

        maxRunningSteps = 52
        timeCost = 1/maxRunningSteps
        terminalBonus = 1
        rewardFunction = RewardFunctionByTerminal(timeCost, terminalBonus, isTerminal)

        forwardOneStep = ForwardOneStep(transit, rewardFunction)
        sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset, forwardOneStep)

        ## MDP Policy
	# Sheep Part

	# Sheep Policy Function
        numSheepPolicyStateSpace = 2 * (numWolves + 1)
        sheepActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7),
                       (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)]
        preyPowerRatio = 12
        sheepIndividualActionSpace = list(map(tuple, np.array(sheepActionSpace) * preyPowerRatio))
        numSheepActionSpace = len(sheepIndividualActionSpace)
        regularizationFactor = 1e-4
        generateSheepModel = GenerateModel(numSheepPolicyStateSpace, numSheepActionSpace, regularizationFactor)
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        sheepNNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepModel = generateSheepModel(sharedWidths * sheepNNDepth, actionLayerWidths, valueLayerWidths, 
                resBlockSize, initializationMethod, dropoutRate)
        sheepModelPath = os.path.join('..', '..', 'data', 'preTrainModel',
                'agentId=0.'+str(numWolves)+'_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations=110_trainSteps=50000')
        sheepNNModel = restoreVariables(initSheepModel, sheepModelPath)
        sheepPolicy = ApproximatePolicy(sheepNNModel, sheepIndividualActionSpace)

        # Sheep Generate Action
        softParameterInPlanningForSheep = 2.5
        softPolicyInPlanningForSheep = SoftDistribution(softParameterInPlanningForSheep)
        softenSheepPolicy = lambda relativeAgentsStatesForSheepPolicy: softPolicyInPlanningForSheep(sheepPolicy(relativeAgentsStatesForSheepPolicy))

        sheepChooseActionMethod = sampleFromDistribution
        sheepSampleActions = [SampleActionOnFixedIntention(selfId, possibleWolvesIds, sheepPolicy, sheepChooseActionMethod) for selfId in possibleSheepIds]

	# Wolves Part

        # Policy Likelihood function: Wolf Centrol Control NN Policy Given Intention
        numWolvesStateSpaces = [2 * (numInWe + 1) 
                for numInWe in range(2, numWolves + 1)]
        actionSpace = [(10, 0), (0, 10), (-10, 0), (0, -10)]
        predatorPowerRatio = 8
        wolfIndividualActionSpace = list(map(tuple, np.array(actionSpace) * predatorPowerRatio))
        wolvesCentralControlActionSpaces = [list(it.product(wolfIndividualActionSpace, repeat = numInWe)) 
                for numInWe in range(2, numWolves + 1)]
        numWolvesCentralControlActionSpaces = [len(wolvesCentralControlActionSpace)
                for wolvesCentralControlActionSpace in wolvesCentralControlActionSpaces]
        regularizationFactor = 1e-4
        generateWolvesCentralControlModels = [GenerateModel(numStateSpace, numActionSpace, regularizationFactor) 
            for numStateSpace, numActionSpace in zip(numWolvesStateSpaces, numWolvesCentralControlActionSpaces)]
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        wolfNNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initWolvesCentralControlModels = [generateWolvesCentralControlModel(sharedWidths * wolfNNDepth, actionLayerWidths, valueLayerWidths, 
                resBlockSize, initializationMethod, dropoutRate) for generateWolvesCentralControlModel in generateWolvesCentralControlModels] 
        NNNumSimulations = 250
        wolvesModelPaths = [os.path.join('..', '..', 'data', 'preTrainModel', 
                'agentId='+str(len(actionSpace) * np.sum([10**_ for _ in
                range(numInWe)]))+'_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations='+str(NNNumSimulations)+'_trainSteps=50000') 
                for numInWe in range(2, numWolves + 1)]
        print(wolvesModelPaths)
        wolvesCentralControlNNModels = [restoreVariables(initWolvesCentralControlModel, wolvesModelPath) 
                for initWolvesCentralControlModel, wolvesModelPath in zip(initWolvesCentralControlModels, wolvesModelPaths)]
        wolvesCentralControlPolicies = [ApproximatePolicy(NNModel, actionSpace) 
                for NNModel, actionSpace in zip(wolvesCentralControlNNModels, wolvesCentralControlActionSpaces)] 

        centralControlPolicyListBasedOnNumAgentsInWe = wolvesCentralControlPolicies # 0 for two agents in We, 1 for three agents...
        softParameterInInference = 1
        softPolicyInInference = SoftDistribution(softParameterInInference)
        policyForCommittedAgentsInInference = PolicyForCommittedAgent(centralControlPolicyListBasedOnNumAgentsInWe, softPolicyInInference,
                getStateThirdPersonPerspective)
        calCommittedAgentsPolicyLikelihood = CalCommittedAgentsPolicyLikelihood(policyForCommittedAgentsInInference)
        
        wolfLevel2ActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7),
                       (-10, 0), (-7, -7), (0, -10), (7, -7)]
        wolfLevel2IndividualActionSpace = list(map(tuple, np.array(wolfLevel2ActionSpace) * predatorPowerRatio))
        wolfLevel2CentralControlActionSpace = list(it.product(wolfLevel2IndividualActionSpace))
        numWolfLevel2ActionSpace = len(wolfLevel2CentralControlActionSpace)
        regularizationFactor = 1e-4
        generatewolfLevel2Models = [GenerateModel(numStateSpace, numWolfLevel2ActionSpace, regularizationFactor) for numStateSpace in numWolvesStateSpaces]
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        wolfLevel2NNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initwolfLevel2Models = [generatewolfLevel2Model(sharedWidths * wolfLevel2NNDepth, actionLayerWidths, valueLayerWidths, 
                resBlockSize, initializationMethod, dropoutRate) for generatewolfLevel2Model in generatewolfLevel2Models]
        wolfLevel2ModelPaths = [os.path.join('..', '..', 'data', 'preTrainModel', 
                'agentId=1.'+str(numInWe)+'_depth=9_hierarchy=2_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations='+str(NNNumSimulations)+'_trainSteps=50000') 
                for numInWe in range(2, numWolves + 1)]
        wolfLevel2NNModels = [restoreVariables(initwolfLevel2Model, wolfLevel2ModelPath)
                for initwolfLevel2Model, wolfLevel2ModelPath in zip(initwolfLevel2Models, wolfLevel2ModelPaths)]
        wolfLevel2Policies = [ApproximatePolicy(wolfLevel2NNModel, wolfLevel2CentralControlActionSpace) 
                for wolfLevel2NNModel in wolfLevel2NNModels]
        level2PolicyListBasedOnNumAgentsInWe = wolfLevel2Policies # 0 for two agents in We, 1 for three agents...

        softPolicy = SoftDistribution(2.5)
        totalInSmallRangeFlags = []
        for trial in range(self.numTrajectories):
            state = reset()
            while isTerminal(state):
                state = reset()

            jointActions = sampleFromDistribution(softPolicy(wolvesCentralControlPolicies[numWolves - 2](state)))

            hierarchyActions = []
            weIds = [list(range(numSheep, numWolves + numSheep)) for _ in range(numWolves)]
            for index in range(numWolves):
                weId = weIds[index].copy()
                weId.insert(0, weId.pop(index))
                relativeId = [0] + weId
                action = sampleFromDistribution(softPolicy(wolfLevel2Policies[numWolves - 2](state[relativeId])))
                hierarchyActions.append(action)

            reasonableActionRange = [int(np.linalg.norm(np.array(jointAction) - np.array(hierarchyAction)) <= 8 * predatorPowerRatio)
                    for jointAction, hierarchyAction in zip(jointActions, hierarchyActions) if jointAction != (0, 0) and hierarchyAction != (0, 0)]
            totalInSmallRangeFlags = totalInSmallRangeFlags + reasonableActionRange
        inSmallRangeRateMean = np.mean(totalInSmallRangeFlags)
        return inSmallRangeRateMean
コード例 #5
0
    def __call__(self, parameters):
        print(parameters)
        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        softParameterInInference = parameters['inferenceSoft']
        softParameterInPlanning = parameters['wolfPolicySoft']
        otherCompeteRate = parameters['otherCompeteRate']
        competeDetectionRate = parameters['competeDetectionRate']

        ## MDP Env
        # state is all multi agent state # action is all multi agent action
        xBoundary = [0, 600]
        yBoundary = [0, 600]
        numOfAgent = numWolves + numSheep
        reset = Reset(xBoundary, yBoundary, numOfAgent)

        possibleSheepIds = list(range(numSheep))
        possibleWolvesIds = list(range(numSheep, numSheep + numWolves))
        getSheepStatesFromAll = lambda state: np.array(state)[possibleSheepIds]
        getWolvesStatesFromAll = lambda state: np.array(state)[
            possibleWolvesIds]
        killzoneRadius = 50
        isTerminal = IsTerminal(killzoneRadius, getSheepStatesFromAll,
                                getWolvesStatesFromAll)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
            xBoundary, yBoundary)
        interpolateOneFrame = InterpolateOneFrame(
            stayInBoundaryByReflectVelocity)
        numFramesToInterpolate = 3
        transit = TransitWithTerminalCheckOfInterpolation(
            numFramesToInterpolate, interpolateOneFrame, isTerminal)

        maxRunningSteps = 61
        timeCost = 1 / maxRunningSteps
        terminalBonus = 1
        rewardFunction = RewardFunctionByTerminal(timeCost, terminalBonus,
                                                  isTerminal)

        forwardOneStep = ForwardOneStep(transit, rewardFunction)
        sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset,
                                            forwardOneStep)

        ## MDP Policy
        # Sheep Part

        # Sheep Policy Function
        numSheepPolicyStateSpace = 2 * (numWolves + 1)
        sheepActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0),
                            (-7, -7), (0, -10), (7, -7), (0, 0)]
        preyPowerRatio = 12
        sheepIndividualActionSpace = list(
            map(tuple,
                np.array(sheepActionSpace) * preyPowerRatio))
        numSheepActionSpace = len(sheepIndividualActionSpace)
        regularizationFactor = 1e-4
        generateSheepModel = GenerateModel(numSheepPolicyStateSpace,
                                           numSheepActionSpace,
                                           regularizationFactor)
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        sheepNNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepModel = generateSheepModel(sharedWidths * sheepNNDepth,
                                            actionLayerWidths,
                                            valueLayerWidths, resBlockSize,
                                            initializationMethod, dropoutRate)
        sheepModelPath = os.path.join(
            '..', '..', 'data', 'preTrainModel',
            'agentId=0.' + str(numWolves) +
            '_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations=110_trainSteps=50000'
        )
        sheepNNModel = restoreVariables(initSheepModel, sheepModelPath)
        sheepPolicy = ApproximatePolicy(sheepNNModel,
                                        sheepIndividualActionSpace)

        # Sheep Generate Action
        softParameterInPlanningForSheep = 2.0
        softPolicyInPlanningForSheep = SoftDistribution(
            softParameterInPlanningForSheep)
        softenSheepPolicy = lambda relativeAgentsStatesForSheepPolicy: softPolicyInPlanningForSheep(
            sheepPolicy(relativeAgentsStatesForSheepPolicy))

        sheepChooseActionMethod = sampleFromDistribution
        sheepSampleActions = [
            SampleActionOnFixedIntention(selfId, possibleWolvesIds,
                                         softenSheepPolicy,
                                         sheepChooseActionMethod)
            for selfId in possibleSheepIds
        ]

        # Wolves Part

        # Percept Action For Inference
        perceptAction = lambda action: action

        # Policy Likelihood function: Wolf Centrol Control NN Policy Given Intention
        numWolvesStateSpaces = [
            2 * (numInWe + 1) for numInWe in range(2, numWolves + 1)
        ]
        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                       (0, -10), (7, -7)]
        predatorPowerRatio = 8
        wolfIndividualActionSpace = list(
            map(tuple,
                np.array(actionSpace) * predatorPowerRatio))
        wolvesCentralControlActionSpaces = [
            list(it.product(wolfIndividualActionSpace, repeat=numInWe))
            for numInWe in range(2, numWolves + 1)
        ]
        numWolvesCentralControlActionSpaces = [
            len(wolvesCentralControlActionSpace) for
            wolvesCentralControlActionSpace in wolvesCentralControlActionSpaces
        ]
        regularizationFactor = 1e-4
        generateWolvesCentralControlModels = [
            GenerateModel(numStateSpace, numActionSpace, regularizationFactor)
            for numStateSpace, numActionSpace in zip(
                numWolvesStateSpaces, numWolvesCentralControlActionSpaces)
        ]
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        wolfNNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initWolvesCentralControlModels = [
            generateWolvesCentralControlModel(sharedWidths * wolfNNDepth,
                                              actionLayerWidths,
                                              valueLayerWidths, resBlockSize,
                                              initializationMethod,
                                              dropoutRate)
            for generateWolvesCentralControlModel in
            generateWolvesCentralControlModels
        ]
        NNNumSimulations = 250
        wolvesModelPaths = [
            os.path.join(
                '..', '..', 'data', 'preTrainModel',
                'agentId=' + str(8 * np.sum([10**_ for _ in range(numInWe)])) +
                '_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations='
                + str(NNNumSimulations) + '_trainSteps=50000')
            for numInWe in range(2, numWolves + 1)
        ]
        print(wolvesModelPaths)
        wolvesCentralControlNNModels = [
            restoreVariables(initWolvesCentralControlModel, wolvesModelPath)
            for initWolvesCentralControlModel, wolvesModelPath in zip(
                initWolvesCentralControlModels, wolvesModelPaths)
        ]
        wolvesCentralControlPolicies = [
            ApproximatePolicy(NNModel, actionSpace) for NNModel, actionSpace in
            zip(wolvesCentralControlNNModels, wolvesCentralControlActionSpaces)
        ]

        centralControlPolicyListBasedOnNumAgentsInWe = wolvesCentralControlPolicies  # 0 for two agents in We, 1 for three agents...
        softPolicyInInference = SoftDistribution(softParameterInInference)
        policyForCommittedAgentsInInference = PolicyForCommittedAgent(
            centralControlPolicyListBasedOnNumAgentsInWe,
            softPolicyInInference, getStateOrActionThirdPersonPerspective)
        concernedAgentsIds = [2]
        calCommittedAgentsPolicyLikelihood = CalCommittedAgentsPolicyLikelihood(
            concernedAgentsIds, policyForCommittedAgentsInInference)

        getGoalStateForIndividualHeatseeking = lambda statesRelative: np.array(
            statesRelative)[0]
        getSelfStateForIndividualHeatseeking = lambda statesRelative: np.array(
            statesRelative)[1]
        heatseekingPrecesion = 1.83
        heatSeekingDiscreteStochasticPolicy = HeatSeekingDiscreteStochasticPolicy(
            heatseekingPrecesion, wolfIndividualActionSpace,
            getSelfStateForIndividualHeatseeking,
            getGoalStateForIndividualHeatseeking)
        policyForUncommittedAgentsInInference = PolicyForUncommittedAgent(
            possibleWolvesIds, heatSeekingDiscreteStochasticPolicy,
            softPolicyInInference, getStateOrActionFirstPersonPerspective)
        calUncommittedAgentsPolicyLikelihood = CalUncommittedAgentsPolicyLikelihood(
            possibleWolvesIds, concernedAgentsIds,
            policyForUncommittedAgentsInInference)

        # Joint Likelihood
        calJointLikelihood = lambda intention, state, perceivedAction: calCommittedAgentsPolicyLikelihood(intention, state, perceivedAction) * \
                calUncommittedAgentsPolicyLikelihood(intention, state, perceivedAction)

        wolvesValueListBasedOnNumAgentsInWe = [
            ApproximateValue(NNModel)
            for NNModel in wolvesCentralControlNNModels
        ]
        calIntentionValueGivenState = CalIntentionValueGivenState(
            wolvesValueListBasedOnNumAgentsInWe)
        softParamterForValue = 0.01
        softValueToBuildDistribution = SoftMax(softParamterForValue)
        adjustIntentionPriorGivenValueOfState = AdjustIntentionPriorGivenValueOfState(
            calIntentionValueGivenState, softValueToBuildDistribution)

        # Sample and Save Trajectory
        trajectoriesWithIntentionDists = []
        for trajectoryId in range(self.numTrajectories):

            # Intention Prior For inference
            otherWolfPossibleIntentionSpaces = {0: [(0, (1, 2))], 1: [(0, ())]}
            otherIntentionType = np.random.choice(
                [1, 0], p=[otherCompeteRate, 1 - otherCompeteRate])
            otherWolfIntentionSpace = otherWolfPossibleIntentionSpaces[
                otherIntentionType]
            selfPossibleIntentionSpaces = {
                0: [(0, (1, 2))],
                0.5: [(0, (1, 2)), (0, ())],
                1: [(0, ())]
            }
            selfWolfIntentionSpace = selfPossibleIntentionSpaces[
                competeDetectionRate]
            intentionSpacesForAllWolves = [
                selfWolfIntentionSpace, otherWolfIntentionSpace
            ]
            wolvesIntentionPriors = [{
                tuple(intention): 1 / len(allPossibleIntentionsOneWolf)
                for intention in allPossibleIntentionsOneWolf
            } for allPossibleIntentionsOneWolf in intentionSpacesForAllWolves]
            # Infer and update Intention
            variablesForAllWolves = [[
                intentionSpace
            ] for intentionSpace in intentionSpacesForAllWolves]
            jointHypothesisSpaces = [
                pd.MultiIndex.from_product(variables, names=['intention'])
                for variables in variablesForAllWolves
            ]
            concernedHypothesisVariable = ['intention']
            priorDecayRate = 1
            softPrior = SoftDistribution(priorDecayRate)
            inferIntentionOneStepList = [
                InferOneStep(jointHypothesisSpace, concernedHypothesisVariable,
                             calJointLikelihood, softPrior)
                for jointHypothesisSpace in jointHypothesisSpaces
            ]

            chooseIntention = sampleFromDistribution
            valuePriorEndTime = -100
            updateIntentions = [
                UpdateIntention(intentionPrior, valuePriorEndTime,
                                adjustIntentionPriorGivenValueOfState,
                                perceptAction, inferIntentionOneStep,
                                chooseIntention)
                for intentionPrior, inferIntentionOneStep in zip(
                    wolvesIntentionPriors, inferIntentionOneStepList)
            ]

            # reset intention and adjuste intention prior attributes tools for multiple trajectory
            intentionResetAttributes = [
                'timeStep', 'lastState', 'lastAction', 'intentionPrior',
                'formerIntentionPriors'
            ]
            intentionResetAttributeValues = [
                dict(
                    zip(intentionResetAttributes,
                        [0, None, None, intentionPrior, [intentionPrior]]))
                for intentionPrior in wolvesIntentionPriors
            ]
            resetIntentions = ResetObjects(intentionResetAttributeValues,
                                           updateIntentions)
            returnAttributes = ['formerIntentionPriors']
            getIntentionDistributions = GetObjectsValuesOfAttributes(
                returnAttributes, updateIntentions)
            attributesToRecord = ['lastAction']
            recordActionForUpdateIntention = RecordValuesForObjects(
                attributesToRecord, updateIntentions)

            # Wovels Generate Action
            softPolicyInPlanning = SoftDistribution(softParameterInPlanning)
            policyForCommittedAgentInPlanning = PolicyForCommittedAgent(
                centralControlPolicyListBasedOnNumAgentsInWe,
                softPolicyInPlanning, getStateOrActionThirdPersonPerspective)

            policyForUncommittedAgentInPlanning = PolicyForUncommittedAgent(
                possibleWolvesIds, heatSeekingDiscreteStochasticPolicy,
                softPolicyInPlanning, getStateOrActionFirstPersonPerspective)

            wolfChooseActionMethod = sampleFromDistribution
            getSelfActionThirdPersonPerspective = lambda weIds, selfId: list(
                weIds).index(selfId)
            chooseCommittedAction = GetActionFromJointActionDistribution(
                wolfChooseActionMethod, getSelfActionThirdPersonPerspective)
            chooseUncommittedAction = sampleFromDistribution
            wolvesSampleIndividualActionGivenIntentionList = [
                SampleIndividualActionGivenIntention(
                    selfId, policyForCommittedAgentInPlanning,
                    policyForUncommittedAgentInPlanning, chooseCommittedAction,
                    chooseUncommittedAction) for selfId in possibleWolvesIds
            ]

            wolvesSampleActions = [
                SampleActionOnChangableIntention(
                    updateIntention,
                    wolvesSampleIndividualActionGivenIntention)
                for updateIntention, wolvesSampleIndividualActionGivenIntention
                in zip(updateIntentions,
                       wolvesSampleIndividualActionGivenIntentionList)
            ]
            allIndividualSampleActions = sheepSampleActions + wolvesSampleActions
            sampleActionMultiAgent = SampleActionMultiagent(
                allIndividualSampleActions, recordActionForUpdateIntention)
            trajectory = sampleTrajectory(sampleActionMultiAgent)
            intentionDistributions = getIntentionDistributions()
            trajectoryWithIntentionDists = [
                tuple(list(SASRPair) + list(intentionDist)) for SASRPair,
                intentionDist in zip(trajectory, intentionDistributions)
            ]
            trajectoriesWithIntentionDists.append(
                tuple(trajectoryWithIntentionDists))
            resetIntentions()
            #print(intentionDistributions[-1], otherCompeteRate)
        trajectoryFixedParameters = {
            'sheepPolicySoft': softParameterInPlanningForSheep,
            'wolfPolicySoft': softParameterInPlanning,
            'maxRunningSteps': maxRunningSteps,
            'competePolicy': 'heatseeking',
            'NNNumSimulations': NNNumSimulations,
            'heatseekingPrecesion': heatseekingPrecesion
        }
        self.saveTrajectoryByParameters(trajectoriesWithIntentionDists,
                                        trajectoryFixedParameters, parameters)
        print(np.mean([len(tra) for tra in trajectoriesWithIntentionDists]))
コード例 #6
0
def iterateTrainOneCondition(parameterOneCondition):

    numTrainStepEachIteration = int(
        parameterOneCondition['numTrainStepEachIteration'])
    numTrajectoriesPerIteration = int(
        parameterOneCondition['numTrajectoriesPerIteration'])
    dirName = os.path.dirname(__file__)

    numOfAgent = 2
    agentIds = list(range(numOfAgent))

    maxRunningSteps = 50
    numSimulations = 250
    killzoneRadius = 50
    fixedParameters = {
        'maxRunningSteps': maxRunningSteps,
        'numSimulations': numSimulations,
        'killzoneRadius': killzoneRadius
    }
    # env MDP
    sheepsID = [0]
    wolvesID = [1, 2]
    blocksID = []

    numSheeps = len(sheepsID)
    numWolves = len(wolvesID)
    numBlocks = len(blocksID)

    numAgents = numWolves + numSheeps
    numEntities = numAgents + numBlocks

    sheepSize = 0.05
    wolfSize = 0.075
    blockSize = 0.2

    sheepMaxSpeed = 1.3 * 1
    wolfMaxSpeed = 1.0 * 1
    blockMaxSpeed = None

    entitiesSizeList = [sheepSize] * numSheeps + [wolfSize] * numWolves + [
        blockSize
    ] * numBlocks
    entityMaxSpeedList = [sheepMaxSpeed] * numSheeps + [
        wolfMaxSpeed
    ] * numWolves + [blockMaxSpeed] * numBlocks
    entitiesMovableList = [True] * numAgents + [False] * numBlocks
    massList = [1.0] * numEntities

    centralControlId = 1
    centerControlIndexList = [centralControlId]
    reshapeAction = UnpackCenterControlAction(centerControlIndexList)
    getCollisionForce = GetCollisionForce()
    applyActionForce = ApplyActionForce(wolvesID, sheepsID,
                                        entitiesMovableList)
    applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList,
                                          entitiesSizeList, getCollisionForce,
                                          getPosFromAgentState)
    integrateState = IntegrateState(numEntities, entitiesMovableList, massList,
                                    entityMaxSpeedList, getVelFromAgentState,
                                    getPosFromAgentState)
    interpolateState = TransitMultiAgentChasing(numEntities, reshapeAction,
                                                applyActionForce,
                                                applyEnvironForce,
                                                integrateState)

    numFramesToInterpolate = 1

    def transit(state, action):
        for frameIndex in range(numFramesToInterpolate):
            nextState = interpolateState(state, action)
            action = np.array([(0, 0)] * numAgents)
            state = nextState
        return nextState

    isTerminal = lambda state: False

    isCollision = IsCollision(getPosFromAgentState)
    collisonRewardWolf = 1
    punishForOutOfBound = PunishForOutOfBound()
    rewardWolf = RewardCentralControlPunishBond(
        wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState,
        isCollision, punishForOutOfBound, collisonRewardWolf)
    collisonRewardSheep = -1
    rewardSheep = RewardCentralControlPunishBond(
        sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState,
        isCollision, punishForOutOfBound, collisonRewardSheep)

    resetState = ResetMultiAgentChasing(numAgents, numBlocks)

    observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID,
                                              blocksID, getPosFromAgentState,
                                              getVelFromAgentState)
    observe = lambda state: [
        observeOneAgent(agentID)(state) for agentID in range(numAgents)
    ]

    # policy
    actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                   (0, -10), (7, -7), (0, 0)]
    wolfActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                       (0, -10), (7, -7), (0, 0)]

    preyPowerRatio = 0.5
    sheepActionSpace = list(map(tuple, np.array(actionSpace) * preyPowerRatio))

    predatorPowerRatio = 0.5
    wolfActionOneSpace = list(
        map(tuple,
            np.array(wolfActionSpace) * predatorPowerRatio))
    wolfActionTwoSpace = list(
        map(tuple,
            np.array(wolfActionSpace) * predatorPowerRatio))

    wolvesActionSpace = list(it.product(wolfActionOneSpace,
                                        wolfActionTwoSpace))

    actionSpaceList = [sheepActionSpace, wolvesActionSpace]

    # neural network init
    numStateSpace = 4 * numEntities
    numSheepActionSpace = len(sheepActionSpace)
    numWolvesActionSpace = len(wolvesActionSpace)

    regularizationFactor = 1e-4
    sharedWidths = [128]
    actionLayerWidths = [128]
    valueLayerWidths = [128]

    generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace,
                                       regularizationFactor)
    generateWolvesModel = GenerateModel(numStateSpace, numWolvesActionSpace,
                                        regularizationFactor)
    generateModelList = [generateSheepModel, generateWolvesModel]

    sheepDepth = 9
    wolfDepth = 9
    depthList = [sheepDepth, wolfDepth]
    resBlockSize = 2
    dropoutRate = 0.0
    initializationMethod = 'uniform'
    multiAgentNNmodel = [
        generateModel(sharedWidths * depth, actionLayerWidths,
                      valueLayerWidths, resBlockSize, initializationMethod,
                      dropoutRate)
        for depth, generateModel in zip(depthList, generateModelList)
    ]

    # replay buffer
    bufferSize = 20000
    saveToBuffer = SaveToBuffer(bufferSize)

    def getUniformSamplingProbabilities(buffer):
        return [(1 / len(buffer)) for _ in buffer]

    miniBatchSize = 512
    sampleBatchFromBuffer = SampleBatchFromBuffer(
        miniBatchSize, getUniformSamplingProbabilities)

    # pre-process the trajectory for replayBuffer
    rewardMultiAgents = [rewardSheep, rewardWolf]
    decay = 1
    accumulateMultiAgentRewards = AccumulateMultiAgentRewards(decay)

    addMultiAgentValuesToTrajectory = AddValuesToTrajectory(
        accumulateMultiAgentRewards)
    actionIndex = 1

    def getTerminalActionFromTrajectory(trajectory):
        return trajectory[-1][actionIndex]

    removeTerminalTupleFromTrajectory = RemoveTerminalTupleFromTrajectory(
        getTerminalActionFromTrajectory)

    # pre-process the trajectory for NNTraining
    sheepActionToOneHot = ActionToOneHot(sheepActionSpace)
    wolvesActionToOneHot = ActionToOneHot(wolvesActionSpace)
    actionToOneHotList = [sheepActionToOneHot, wolvesActionToOneHot]
    processTrajectoryForPolicyValueNets = [
        ProcessTrajectoryForPolicyValueNetMultiAgentReward(
            actionToOneHotList[agentId], agentId) for agentId in agentIds
    ]

    # function to train NN model
    terminalThreshold = 1e-6
    lossHistorySize = 10
    initActionCoeff = 1
    initValueCoeff = 1
    initCoeff = (initActionCoeff, initValueCoeff)
    afterActionCoeff = 1
    afterValueCoeff = 1
    afterCoeff = (afterActionCoeff, afterValueCoeff)

    terminalController = TrainTerminalController(lossHistorySize,
                                                 terminalThreshold)
    coefficientController = CoefficientCotroller(initCoeff, afterCoeff)

    reportInterval = 10000
    trainStepsIntervel = 1  # 10000

    trainReporter = TrainReporter(numTrainStepEachIteration, reportInterval)
    learningRateDecay = 1
    learningRateDecayStep = 1
    learningRate = 0.0001
    learningRateModifier = LearningRateModifier(learningRate,
                                                learningRateDecay,
                                                learningRateDecayStep)

    trainNN = Train(numTrainStepEachIteration, miniBatchSize, sampleData,
                    learningRateModifier, terminalController,
                    coefficientController, trainReporter)

    # load save dir

    trajectorySaveExtension = '.pickle'
    NNModelSaveExtension = ''
    trajectoriesSaveDirectory = os.path.join(
        dirName, '..', '..', 'data', 'iterTrain2wolves1sheepMADDPGEnv',
        'trajectories')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)

    NNModelSaveDirectory = os.path.join(dirName, '..', '..', 'data',
                                        'iterTrain2wolves1sheepMADDPGEnv',
                                        'NNModelRes')
    if not os.path.exists(NNModelSaveDirectory):
        os.makedirs(NNModelSaveDirectory)

    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory,
                                             trajectorySaveExtension,
                                             fixedParameters)
    generateNNModelSavePath = GetSavePath(NNModelSaveDirectory,
                                          NNModelSaveExtension,
                                          fixedParameters)

    startTime = time.time()

    sheepDepth = 9
    wolfDepth = 9
    depthList = [sheepDepth, wolfDepth]
    resBlockSize = 2
    dropoutRate = 0.0
    initializationMethod = 'uniform'
    multiAgentNNmodel = [
        generateModel(sharedWidths * depth, actionLayerWidths,
                      valueLayerWidths, resBlockSize, initializationMethod,
                      dropoutRate)
        for depth, generateModel in zip(depthList, generateModelList)
    ]

    preprocessMultiAgentTrajectories = PreprocessTrajectoriesForBuffer(
        addMultiAgentValuesToTrajectory, removeTerminalTupleFromTrajectory)
    numTrajectoriesToStartTrain = 1024

    trainOneAgent = TrainOneAgent(numTrainStepEachIteration,
                                  numTrajectoriesToStartTrain,
                                  processTrajectoryForPolicyValueNets,
                                  sampleBatchFromBuffer, trainNN)

    # restorePretrainModel
    sheepPreTrainModelPath = os.path.join(
        dirName, '..', '..', 'data', 'MADDPG2wolves1sheep',
        'trainSheepWithPretrrainWolves', 'trainedResNNModels',
        'agentId=0_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations=250_trainSteps=50000'
    )

    wolvesPreTrainModelPath = os.path.join(
        dirName, '..', '..', 'data', 'MADDPG2wolves1sheep',
        'trainWolvesTwoCenterControlAction', 'trainedResNNModels',
        'agentId=1_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations=250_trainSteps=50000'
    )

    pretrainModelPathList = [sheepPreTrainModelPath, wolvesPreTrainModelPath]

    sheepId, wolvesId = [0, 1]
    trainableAgentIds = [sheepId, wolvesId]
    for agentId in trainableAgentIds:

        restoredNNModel = restoreVariables(multiAgentNNmodel[agentId],
                                           pretrainModelPathList[agentId])
        multiAgentNNmodel[agentId] = restoredNNModel

        NNModelPathParameters = {
            'iterationIndex': 0,
            'agentId': agentId,
            'numTrajectoriesPerIteration': numTrajectoriesPerIteration,
            'numTrainStepEachIteration': numTrainStepEachIteration
        }
        NNModelSavePath = generateNNModelSavePath(NNModelPathParameters)
        saveVariables(multiAgentNNmodel[agentId], NNModelSavePath)

    fuzzySearchParameterNames = ['sampleIndex']
    loadTrajectoriesForParallel = LoadTrajectories(generateTrajectorySavePath,
                                                   loadFromPickle,
                                                   fuzzySearchParameterNames)
    loadTrajectoriesForTrainBreak = LoadTrajectories(
        generateTrajectorySavePath, loadFromPickle)

    # initRreplayBuffer
    replayBuffer = []
    trajectoryBeforeTrainIndex = 0
    trajectoryBeforeTrainPathParamters = {
        'iterationIndex': trajectoryBeforeTrainIndex
    }
    trajectoriesBeforeTrain = loadTrajectoriesForParallel(
        trajectoryBeforeTrainPathParamters)
    preProcessedTrajectoriesBeforeTrain = preprocessMultiAgentTrajectories(
        trajectoriesBeforeTrain)
    replayBuffer = saveToBuffer(replayBuffer,
                                preProcessedTrajectoriesBeforeTrain)

    # delete used model for disk space
    fixedParametersForDelete = {
        'maxRunningSteps': maxRunningSteps,
        'numSimulations': numSimulations,
        'killzoneRadius': killzoneRadius,
        'numTrajectoriesPerIteration': numTrajectoriesPerIteration,
        'numTrainStepEachIteration': numTrainStepEachIteration
    }
    toDeleteNNModelExtensionList = ['.meta', '.index', '.data-00000-of-00001']
    generatetoDeleteNNModelPathList = [
        GetSavePath(NNModelSaveDirectory, toDeleteNNModelExtension,
                    fixedParametersForDelete)
        for toDeleteNNModelExtension in toDeleteNNModelExtensionList
    ]

    # restore model
    restoredIteration = 0
    for agentId in trainableAgentIds:
        modelPathForRestore = generateNNModelSavePath({
            'iterationIndex':
            restoredIteration,
            'agentId':
            agentId,
            'numTrajectoriesPerIteration':
            numTrajectoriesPerIteration,
            'numTrainStepEachIteration':
            numTrainStepEachIteration
        })
        restoredNNModel = restoreVariables(multiAgentNNmodel[agentId],
                                           modelPathForRestore)
        multiAgentNNmodel[agentId] = restoredNNModel


# restore buffer
    bufferTrajectoryPathParameters = {
        'numTrajectoriesPerIteration': numTrajectoriesPerIteration,
        'numTrainStepEachIteration': numTrainStepEachIteration
    }
    restoredIterationIndexRange = range(restoredIteration)
    restoredTrajectories = loadTrajectoriesForTrainBreak(
        parameters=bufferTrajectoryPathParameters,
        parametersWithSpecificValues={
            'iterationIndex': list(restoredIterationIndexRange)
        })
    preProcessedRestoredTrajectories = preprocessMultiAgentTrajectories(
        restoredTrajectories)
    print(len(preProcessedRestoredTrajectories))
    replayBuffer = saveToBuffer(replayBuffer, preProcessedRestoredTrajectories)

    modelMemorySize = 5
    modelSaveFrequency = 50
    deleteUsedModel = DeleteUsedModel(modelMemorySize, modelSaveFrequency,
                                      generatetoDeleteNNModelPathList)
    numIterations = 10000
    for iterationIndex in range(restoredIteration + 1, numIterations):
        print('iterationIndex: ', iterationIndex)

        numCpuToUseWhileTrain = int(16)
        numCmdList = min(numTrajectoriesPerIteration, numCpuToUseWhileTrain)
        sampleTrajectoryFileName = 'sampleMultiMCTSAgentCenterControlResNetTrajCondtion.py'

        generateTrajectoriesParallelWhileTrain = GenerateTrajectoriesParallel(
            sampleTrajectoryFileName, numTrajectoriesPerIteration, numCmdList)
        trajectoryPathParameters = {
            'iterationIndex': iterationIndex,
            'numTrajectoriesPerIteration': numTrajectoriesPerIteration,
            'numTrainStepEachIteration': numTrainStepEachIteration
        }
        cmdList = generateTrajectoriesParallelWhileTrain(
            trajectoryPathParameters)

        trajectories = loadTrajectoriesForParallel(trajectoryPathParameters)
        trajectorySavePath = generateTrajectorySavePath(
            trajectoryPathParameters)
        saveToPickle(trajectories, trajectorySavePath)

        preProcessedTrajectories = preprocessMultiAgentTrajectories(
            trajectories)
        updatedReplayBuffer = saveToBuffer(replayBuffer,
                                           preProcessedTrajectories)

        for agentId in trainableAgentIds:

            updatedAgentNNModel = trainOneAgent(agentId, multiAgentNNmodel,
                                                updatedReplayBuffer)

            NNModelPathParameters = {
                'iterationIndex': iterationIndex,
                'agentId': agentId,
                'numTrajectoriesPerIteration': numTrajectoriesPerIteration,
                'numTrainStepEachIteration': numTrainStepEachIteration
            }
            NNModelSavePath = generateNNModelSavePath(NNModelPathParameters)
            saveVariables(updatedAgentNNModel, NNModelSavePath)
            multiAgentNNmodel[agentId] = updatedAgentNNModel
            replayBuffer = updatedReplayBuffer

            deleteUsedModel(iterationIndex, agentId)

    endTime = time.time()
    print("Time taken for {} iterations: {} seconds".format(
        numIterations, (endTime - startTime)))
コード例 #7
0
def main():
    parametersForTrajectoryPath = json.loads(sys.argv[1])
    startSampleIndex = int(sys.argv[2])
    endSampleIndex = int(sys.argv[3])

    # parametersForTrajectoryPath['sampleOneStepPerTraj']=1 #0
    # parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex)

    trainSteps = int(parametersForTrajectoryPath['trainSteps'])
    depth = int(parametersForTrajectoryPath['depth'])
    dataSize = int(parametersForTrajectoryPath['dataSize'])

    # parametersForTrajectoryPath = {}
    # depth = 5
    # dataSize = 5000
    # trainSteps = 50000
    # startSampleIndex = 0
    # endSampleIndex = 100

    killzoneRadius = 25
    numSimulations = 200
    maxRunningSteps = 100

    fixedParameters = {
        'maxRunningSteps': maxRunningSteps,
        'numSimulations': numSimulations,
        'killzoneRadius': killzoneRadius
    }
    trajectorySaveExtension = '.pickle'
    dirName = os.path.dirname(__file__)
    trajectoriesSaveDirectory = os.path.join(
        dirName, '..', '..', '..', 'data', 'evaluateSupervisedLearning',
        'multiMCTSAgentResNetNoPhysicsCenterControl',
        'evaluateCenterControlTrajByCondition')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)
    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory,
                                             trajectorySaveExtension,
                                             fixedParameters)

    trajectorySavePath = generateTrajectorySavePath(
        parametersForTrajectoryPath)
    if not os.path.isfile(trajectorySavePath):

        numOfAgent = 3
        sheepId = 0
        wolvesId = 1

        wolfOneId = 1
        wolfTwoId = 2
        xPosIndex = [0, 1]
        xBoundary = [0, 600]
        yBoundary = [0, 600]
        reset = Reset(xBoundary, yBoundary, numOfAgent)

        getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex)
        getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex)
        getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex)

        isTerminalOne = IsTerminal(getWolfOneXPos, getSheepXPos,
                                   killzoneRadius)
        isTerminalTwo = IsTerminal(getWolfTwoXPos, getSheepXPos,
                                   killzoneRadius)
        isTerminal = lambda state: isTerminalOne(state) or isTerminalTwo(state)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
            xBoundary, yBoundary)
        transit = TransiteForNoPhysics(stayInBoundaryByReflectVelocity)

        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                       (0, -10), (7, -7), (0, 0)]
        preyPowerRatio = 3
        sheepActionSpace = list(
            map(tuple,
                np.array(actionSpace) * preyPowerRatio))

        predatorPowerRatio = 2
        wolfActionOneSpace = list(
            map(tuple,
                np.array(actionSpace) * predatorPowerRatio))
        wolfActionTwoSpace = list(
            map(tuple,
                np.array(actionSpace) * predatorPowerRatio))
        wolvesActionSpace = list(
            it.product(wolfActionOneSpace, wolfActionTwoSpace))

        # neural network init
        numStateSpace = 6
        numSheepActionSpace = len(sheepActionSpace)
        numWolvesActionSpace = len(wolvesActionSpace)

        regularizationFactor = 1e-4
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace,
                                           regularizationFactor)

        # load save dir
        NNModelSaveExtension = ''
        NNModelSaveDirectory = os.path.join(
            dirName, '..', '..', '..', 'data',
            'evaluateEscapeMultiChasingNoPhysics',
            'trainedResNNModelsMultiStillAction')
        NNModelFixedParameters = {
            'agentId': 0,
            'maxRunningSteps': 150,
            'numSimulations': 200,
            'miniBatchSize': 256,
            'learningRate': 0.0001
        }
        getNNModelSavePath = GetSavePath(NNModelSaveDirectory,
                                         NNModelSaveExtension,
                                         NNModelFixedParameters)

        if not os.path.exists(NNModelSaveDirectory):
            os.makedirs(NNModelSaveDirectory)

        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepNNModel = generateSheepModel(sharedWidths * 5,
                                              actionLayerWidths,
                                              valueLayerWidths, resBlockSize,
                                              initializationMethod,
                                              dropoutRate)

        sheepTrainedModelPath = getNNModelSavePath({
            'trainSteps': 50000,
            'depth': 5
        })
        sheepTrainedModel = restoreVariables(initSheepNNModel,
                                             sheepTrainedModelPath)
        sheepPolicy = ApproximatePolicy(sheepTrainedModel, sheepActionSpace)

        generateWolvesModel = GenerateModel(numStateSpace,
                                            numWolvesActionSpace,
                                            regularizationFactor)
        initWolvesNNModel = generateWolvesModel(sharedWidths * depth,
                                                actionLayerWidths,
                                                valueLayerWidths, resBlockSize,
                                                initializationMethod,
                                                dropoutRate)
        NNModelSaveDirectory = os.path.join(
            dirName, '..', '..', '..', 'data', 'evaluateSupervisedLearning',
            'multiMCTSAgentResNetNoPhysicsCenterControl', 'trainedResNNModels')
        wolfId = 1
        NNModelFixedParametersWolves = {
            'agentId': wolfId,
            'maxRunningSteps': maxRunningSteps,
            'numSimulations': numSimulations,
            'miniBatchSize': 256,
            'learningRate': 0.0001,
        }

        getNNModelSavePath = GetSavePath(NNModelSaveDirectory,
                                         NNModelSaveExtension,
                                         NNModelFixedParametersWolves)
        wolvesTrainedModelPath = getNNModelSavePath({
            'trainSteps': trainSteps,
            'depth': depth,
            'dataSize': dataSize
        })
        wolvesTrainedModel = restoreVariables(initWolvesNNModel,
                                              wolvesTrainedModelPath)
        wolfPolicy = ApproximatePolicy(wolvesTrainedModel, wolvesActionSpace)

        from exec.evaluateNoPhysicsEnvWithRender import Render
        import pygame as pg
        from pygame.color import THECOLORS
        screenColor = THECOLORS['black']
        circleColorList = [
            THECOLORS['green'], THECOLORS['red'], THECOLORS['orange']
        ]
        circleSize = 10

        saveImage = False
        saveImageDir = os.path.join(dirName, '..', '..', '..', 'data',
                                    'demoImg')
        if not os.path.exists(saveImageDir):
            os.makedirs(saveImageDir)
        renderOn = False
        render = None
        if renderOn:
            screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
            render = Render(numOfAgent, xPosIndex, screen, screenColor,
                            circleColorList, circleSize, saveImage,
                            saveImageDir)
        chooseActionList = [chooseGreedyAction, chooseGreedyAction]
        sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, transit,
                                                      isTerminal, reset,
                                                      chooseActionList, render,
                                                      renderOn)
        # All agents' policies
        policy = lambda state: [sheepPolicy(state), wolfPolicy(state)]
        trajectories = [
            sampleTrajectory(policy)
            for sampleIndex in range(startSampleIndex, endSampleIndex)
        ]

        saveToPickle(trajectories, trajectorySavePath)
コード例 #8
0
def main():
    DEBUG = 0
    renderOn = 0
    if DEBUG:
        parametersForTrajectoryPath = {}
        startSampleIndex = 5
        endSampleIndex = 7
        agentId = 1
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex,
                                                      endSampleIndex)
    else:
        parametersForTrajectoryPath = json.loads(sys.argv[1])
        startSampleIndex = int(sys.argv[2])
        endSampleIndex = int(sys.argv[3])
        agentId = int(parametersForTrajectoryPath['agentId'])
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex,
                                                      endSampleIndex)

    # check file exists or not
    dirName = os.path.dirname(__file__)
    trajectoriesSaveDirectory = os.path.join(
        dirName, '..', '..', '..', '..', 'data', 'NoPhysics2wolves1sheep',
        'trainWolvesTwoCenterControlAction88', 'trajectories')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)

    trajectorySaveExtension = '.pickle'
    maxRunningSteps = 50
    numSimulations = 250
    killzoneRadius = 150
    fixedParameters = {
        'agentId': agentId,
        'maxRunningSteps': maxRunningSteps,
        'numSimulations': numSimulations,
        'killzoneRadius': killzoneRadius
    }

    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory,
                                             trajectorySaveExtension,
                                             fixedParameters)

    trajectorySavePath = generateTrajectorySavePath(
        parametersForTrajectoryPath)

    if not os.path.isfile(trajectorySavePath):
        numOfAgent = 3
        xBoundary = [0, 600]
        yBoundary = [0, 600]
        resetState = Reset(xBoundary, yBoundary, numOfAgent)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
            xBoundary, yBoundary)
        interpolateOneFrame = InterpolateOneFrame(
            stayInBoundaryByReflectVelocity)

        chooseInterpolatedNextState = lambda interpolatedStates: interpolatedStates[
            -1]

        sheepId = 0
        wolvesId = 1
        centerControlIndexList = [wolvesId]
        unpackCenterControlAction = UnpackCenterControlAction(
            centerControlIndexList)

        numFramesToInterpolate = 0
        transit = TransitWithInterpolation(numFramesToInterpolate,
                                           interpolateOneFrame,
                                           chooseInterpolatedNextState,
                                           unpackCenterControlAction)

        # NNGuidedMCTS init
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                       (0, -10), (7, -7), (0, 0)]
        wolfActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0),
                           (-7, -7), (0, -10), (7, -7)]

        preyPowerRatio = 10
        sheepActionSpace = list(
            map(tuple,
                np.array(actionSpace) * preyPowerRatio))

        predatorPowerRatio = 8
        wolfActionOneSpace = list(
            map(tuple,
                np.array(wolfActionSpace) * predatorPowerRatio))
        wolfActionTwoSpace = list(
            map(tuple,
                np.array(wolfActionSpace) * predatorPowerRatio))

        wolvesActionSpace = list(
            product(wolfActionOneSpace, wolfActionTwoSpace))

        actionSpaceList = [sheepActionSpace, wolvesActionSpace]

        # neural network init
        numStateSpace = 2 * numOfAgent
        numSheepActionSpace = len(sheepActionSpace)
        numWolvesActionSpace = len(wolvesActionSpace)

        regularizationFactor = 1e-4
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace,
                                           regularizationFactor)

        # load save dir
        NNModelSaveExtension = ''
        sheepNNModelSaveDirectory = os.path.join(
            dirName, '..', '..', '..', '..', 'data', 'NoPhysics2wolves1sheep',
            'trainSheepWithTwoHeatSeekingWolves', 'trainedResNNModels')
        sheepNNModelFixedParameters = {
            'agentId': 0,
            'maxRunningSteps': 50,
            'numSimulations': 110,
            'miniBatchSize': 256,
            'learningRate': 0.0001,
        }
        getSheepNNModelSavePath = GetSavePath(sheepNNModelSaveDirectory,
                                              NNModelSaveExtension,
                                              sheepNNModelFixedParameters)

        depth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepNNModel = generateSheepModel(sharedWidths * depth,
                                              actionLayerWidths,
                                              valueLayerWidths, resBlockSize,
                                              initializationMethod,
                                              dropoutRate)

        sheepTrainedModelPath = getSheepNNModelSavePath({
            'trainSteps': 50000,
            'depth': depth
        })
        sheepTrainedModel = restoreVariables(initSheepNNModel,
                                             sheepTrainedModelPath)
        sheepPolicy = ApproximatePolicy(sheepTrainedModel, sheepActionSpace)

        wolfOneId = 1
        wolfTwoId = 2
        xPosIndex = [0, 1]
        getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex)
        getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex)
        speed = 120
        #sheepPolicy = HeatSeekingContinuesDeterministicPolicy(getWolfOneXPos, getSheepXPos, speed)

        # MCTS
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        # prior
        getActionPrior = lambda state: {
            action: 1 / len(wolvesActionSpace)
            for action in wolvesActionSpace
        }

        # load chase nn policy
        chooseActionInMCTS = sampleFromDistribution

        def wolvesTransit(state, action):
            return transit(state,
                           [chooseActionInMCTS(sheepPolicy(state)), action])

        # reward function
        wolfOneId = 1
        wolfTwoId = 2
        xPosIndex = [0, 1]
        getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex)
        getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex)
        getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex)
        isCollidedOne = IsTerminal(getWolfOneXPos, getSheepXPos,
                                   killzoneRadius)
        isCollidedTwo = IsTerminal(getWolfTwoXPos, getSheepXPos,
                                   killzoneRadius)

        calCollisionTimes = lambda state: np.sum([
            isCollidedOne(state), isCollidedTwo(state)
        ])  # collisionTimeByAddingCollisionInAllWolves
        #calCollisionTimes = lambda state: np.max([isCollidedOne(state), isCollidedTwo(state)]) # collisionTimeByBooleanCollisionForAnyWolf

        calTerminationSignals = calCollisionTimes
        chooseInterpolatedStateByEarlyTermination = ChooseInterpolatedStateByEarlyTermination(
            calTerminationSignals)

        numFramesToInterpolateInReward = 3
        interpolateStateInReward = TransitWithInterpolation(
            numFramesToInterpolateInReward, interpolateOneFrame,
            chooseInterpolatedStateByEarlyTermination,
            unpackCenterControlAction)

        aliveBonus = -1 / maxRunningSteps * 10
        deathPenalty = 1
        rewardFunction = RewardFunctionCompeteWithStateInterpolation(
            aliveBonus, deathPenalty, calCollisionTimes,
            interpolateStateInReward)

        # initialize children; expand
        initializeChildren = InitializeChildren(wolvesActionSpace,
                                                wolvesTransit, getActionPrior)
        isTerminal = lambda state: False
        expand = Expand(isTerminal, initializeChildren)

        # random rollout policy
        def rolloutPolicy(state):
            return [
                sampleFromDistribution(sheepPolicy(state)),
                wolvesActionSpace[np.random.choice(
                    range(numWolvesActionSpace))]
            ]

        # rollout
        #rolloutHeuristicWeight = 0
        #minDistance = 400
        #rolloutHeuristic1 = HeuristicDistanceToTarget(
        #    rolloutHeuristicWeight, getWolfOneXPos, getSheepXPos, minDistance)
        #rolloutHeuristic2 = HeuristicDistanceToTarget(
        #    rolloutHeuristicWeight, getWolfTwoXPos, getSheepXPos, minDistance)

        #rolloutHeuristic = lambda state: (rolloutHeuristic1(state) + rolloutHeuristic2(state)) / 2

        rolloutHeuristic = lambda state: 0
        maxRolloutSteps = 15
        rollout = RollOut(rolloutPolicy, maxRolloutSteps, transit,
                          rewardFunction, isTerminal, rolloutHeuristic)

        wolfPolicy = MCTS(numSimulations, selectChild, expand, rollout, backup,
                          establishSoftmaxActionDist)

        # All agents' policies
        policy = lambda state: [sheepPolicy(state), wolfPolicy(state)]
        chooseActionList = [maxFromDistribution, maxFromDistribution]

        def sampleAction(state):
            actionDists = [sheepPolicy(state), wolfPolicy(state)]
            action = [
                chooseAction(actionDist) for actionDist, chooseAction in zip(
                    actionDists, chooseActionList)
            ]
            return action

        render = None
        if renderOn:
            import pygame as pg
            from pygame.color import THECOLORS
            screenColor = THECOLORS['black']
            circleColorList = [
                THECOLORS['green'], THECOLORS['yellow'], THECOLORS['red']
            ]
            circleSize = 10
            saveImage = False
            saveImageDir = os.path.join(dirName, '..', '..', '..', '..',
                                        'data', 'demoImg')
            if not os.path.exists(saveImageDir):
                os.makedirs(saveImageDir)
            screen = pg.display.set_mode([max(xBoundary), max(yBoundary)])
            render = Render(numOfAgent, xPosIndex, screen, screenColor,
                            circleColorList, circleSize, saveImage,
                            saveImageDir)

        forwardOneStep = ForwardOneStep(transit, rewardFunction)
        sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps,
                                                      isTerminal, resetState,
                                                      forwardOneStep, render,
                                                      renderOn)

        trajectories = [
            sampleTrajectory(sampleAction)
            for sampleIndex in range(startSampleIndex, endSampleIndex)
        ]
        print([len(traj) for traj in trajectories])
        saveToPickle(trajectories, trajectorySavePath)
コード例 #9
0
    def __call__(self, parameters):
        print(parameters)

        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        numBlocks = 2
        wolfSelfish = 1.0 if parameters[
            'wolfType'] == 'individualReward' else 0.0
        perturbedWolfID = parameters['perturbedWolfID']
        perturbedWolfGoalID = parameters['perturbedWolfGoalID']

        ## MDP Env
        numAgents = numWolves + numSheep
        numEntities = numAgents + numBlocks
        wolvesID = list(range(numWolves))
        sheepsID = list(range(numWolves, numWolves + numSheep))
        blocksID = list(range(numAgents, numEntities))

        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2
        entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [
            blockSize
        ] * numBlocks

        costActionRatio = 0.0
        sheepSpeedMultiplier = 1.0
        sheepMaxSpeed = 1.3 * sheepSpeedMultiplier
        wolfMaxSpeed = 1.0
        blockMaxSpeed = None

        entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
            sheepMaxSpeed
        ] * numSheep + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True] * numAgents + [False] * numBlocks
        massList = [1.0] * numEntities

        collisionReward = 1  # for evaluation, count # of bites
        isCollision = IsCollision(getPosFromAgentState)
        rewardAllWolves = RewardWolf(wolvesID, sheepsID, entitiesSizeList,
                                     isCollision, collisionReward, wolfSelfish)
        rewardWolf = lambda state, action, nextState: np.sum(
            rewardAllWolves(state, action, nextState))

        reshapeActionInTransit = lambda action: action
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID,
                                            entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList,
                                              entitiesSizeList,
                                              getCollisionForce,
                                              getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList,
                                        massList, entityMaxSpeedList,
                                        getVelFromAgentState,
                                        getPosFromAgentState)
        transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit,
                                           applyActionForce, applyEnvironForce,
                                           integrateState)

        forwardOneStep = ForwardOneStep(transit, rewardWolf)

        reset = ResetMultiAgentChasingWithSeed(numAgents, numBlocks)
        isTerminal = lambda state: False
        maxRunningStepsToSample = 101
        sampleTrajectory = SampleTrajectory(maxRunningStepsToSample,
                                            isTerminal, reset, forwardOneStep)

        ## MDP Policy
        worldDim = 2
        actionDim = worldDim * 2 + 1

        layerWidth = [128, 128]
        maxTimeStep = 75
        maxEpisode = 60000
        dirName = os.path.dirname(__file__)

        # ------------ sheep recover variables ------------------------
        numSheepToObserve = 1
        sheepModelListOfDiffWolfReward = []
        sheepTypeList = [0.0, 1.0]

        for sheepType in sheepTypeList:
            wolvesIDForSheepObserve = list(range(numWolves))
            sheepsIDForSheepObserve = list(
                range(numWolves, numSheepToObserve + numWolves))
            blocksIDForSheepObserve = list(
                range(numSheepToObserve + numWolves,
                      numSheepToObserve + numWolves + numBlocks))
            observeOneAgentForSheep = lambda agentID: Observe(
                agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve,
                blocksIDForSheepObserve, getPosFromAgentState,
                getVelFromAgentState)
            observeSheep = lambda state: [
                observeOneAgentForSheep(agentID)(state)
                for agentID in range(numWolves + numSheepToObserve)
            ]

            obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve
            initObsForSheepParams = observeSheep(reset()[obsIDsForSheep])
            obsShapeSheep = [
                initObsForSheepParams[obsID].shape[0]
                for obsID in range(len(initObsForSheepParams))
            ]

            buildSheepModels = BuildMADDPGModels(actionDim,
                                                 numWolves + numSheepToObserve,
                                                 obsShapeSheep)
            sheepModelsList = [
                buildSheepModels(layerWidth, agentID)
                for agentID in range(numWolves, numWolves + numSheepToObserve)
            ]

            sheepFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
                numWolves, numSheepToObserve, numBlocks, maxEpisode,
                maxTimeStep, sheepSpeedMultiplier, costActionRatio, sheepType)
            sheepModelPaths = [
                os.path.join(dirName, '..', '..', 'data', 'preTrainModel',
                             sheepFileName + str(i))
                for i in range(numWolves, numWolves + numSheepToObserve)
            ]
            [
                restoreVariables(model, path)
                for model, path in zip(sheepModelsList, sheepModelPaths)
            ]
            sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList

        # # actOneStep = ActOneStep(actByPolicyTrainNoisy) #TODO
        actOneStep = ActOneStep(actByPolicyTrainNoNoisy)

        numAllSheepModels = len(sheepModelListOfDiffWolfReward)

        # ------------ wolves recover variables ------------------------

        # ------------ Recover one perturbed wolf for comparison -------
        numSheepForPerturbedWolf = 1
        wolvesIDForPerturbedWolf = wolvesID
        sheepsIDForPerturbedWolf = [sheepsID[perturbedWolfGoalID]]
        blocksIDForPerturbedWolf = list(
            range(numWolves + numSheep,
                  numEntities))  # skip the unattended sheep id

        observeOneAgentForPerturbedWolf = lambda agentID: Observe(
            agentID, wolvesIDForPerturbedWolf, sheepsIDForPerturbedWolf,
            blocksIDForPerturbedWolf, getPosFromAgentState,
            getVelFromAgentState)
        observePerturbedWolf = lambda state: [
            observeOneAgentForPerturbedWolf(agentID)(state)
            for agentID in wolvesIDForPerturbedWolf + sheepsIDForPerturbedWolf
        ]

        initObsForPerturbedWolfParams = observePerturbedWolf(reset())
        obsShapePerturbedWolf = [
            initObsForPerturbedWolfParams[obsID].shape[0]
            for obsID in range(len(initObsForPerturbedWolfParams))
        ]
        buildPerturbedWolfModels = BuildMADDPGModels(
            actionDim, numWolves + numSheepForPerturbedWolf,
            obsShapePerturbedWolf)
        layerWidthForWolf = [128, 128]
        perturbedWolfModel = buildPerturbedWolfModels(layerWidthForWolf,
                                                      perturbedWolfID)

        perturbedWolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
            numWolves, numSheepForPerturbedWolf, numBlocks, maxEpisode,
            maxTimeStep, sheepSpeedMultiplier, costActionRatio, wolfSelfish)
        perturbedWolfModelPath = os.path.join(
            dirName, '..', '..', 'data', 'preTrainModel',
            perturbedWolfFileName + str(perturbedWolfID))
        restoreVariables(perturbedWolfModel, perturbedWolfModelPath)

        # ------------ Recover other wolves trained with multiple goals -------

        wolvesIDForWolfObserve = wolvesID
        sheepsIDForWolfObserve = sheepsID
        blocksIDForWolfObserve = blocksID
        observeOneAgentForWolf = lambda agentID: Observe(
            agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve,
            blocksIDForWolfObserve, getPosFromAgentState, getVelFromAgentState)
        observeWolf = lambda state: [
            observeOneAgentForWolf(agentID)(state)
            for agentID in range(numWolves + numSheep)
        ]

        obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve
        initObsForWolfParams = observeWolf(reset()[obsIDsForWolf])
        obsShapeWolf = [
            initObsForWolfParams[obsID].shape[0]
            for obsID in range(len(initObsForWolfParams))
        ]
        buildWolfModels = BuildMADDPGModels(actionDim, numWolves + numSheep,
                                            obsShapeWolf)
        layerWidthForWolf = [128, 128]
        wolfModelsList = [
            buildWolfModels(layerWidthForWolf, agentID)
            for agentID in range(numWolves)
        ]

        wolfFileName = "maddpg{}wolves{}sheep{}blocks{}episodes{}stepSheepSpeed{}WolfActCost{}individ{}_agent".format(
            numWolves, numSheep, numBlocks, maxEpisode, maxTimeStep,
            sheepSpeedMultiplier, costActionRatio, wolfSelfish)
        wolfModelPaths = [
            os.path.join(dirName, '..', '..', 'data', 'preTrainModel',
                         wolfFileName + str(i)) for i in range(numWolves)
        ]
        [
            restoreVariables(model, path)
            for model, path in zip(wolfModelsList, wolfModelPaths)
        ]

        # ------------ compose  policy ---------------------
        actionDimReshaped = 2
        cov = [0.00000000001**2 for _ in range(actionDimReshaped)]
        buildGaussian = BuildGaussianFixCov(cov)
        reshapeAction = ReshapeAction()

        # unperturbed policy
        composeWolfPolicy = lambda wolfModel: lambda state: sampleFromContinuousSpace(
            buildGaussian(
                tuple(reshapeAction(actOneStep(wolfModel, observeWolf(state))))
            ))
        wolvesSampleActions = [
            composeWolfPolicy(wolfModel) for wolfModel in wolfModelsList
        ]

        # perturbed policy
        composePerturbedWolfPolicy = lambda perturbedModel: lambda state: sampleFromContinuousSpace(
            buildGaussian(
                tuple(
                    reshapeAction(
                        actOneStep(perturbedModel, observePerturbedWolf(state))
                    ))))
        wolvesSampleActionsPerturbed = wolvesSampleActions.copy()
        wolvesSampleActionsPerturbed[
            perturbedWolfID] = composePerturbedWolfPolicy(perturbedWolfModel)

        trajectories = []
        for trajectoryId in range(self.numTrajectories):
            sheepModelsForPolicy = [
                sheepModelListOfDiffWolfReward[np.random.choice(
                    numAllSheepModels)] for sheepId in sheepsID
            ]
            composeSheepPolicy = lambda sheepModel: lambda state: {
                tuple(
                    reshapeAction(actOneStep(sheepModel, observeSheep(state)))):
                1
            }
            sheepChooseActionMethod = sampleFromDistribution
            sheepSampleActions = [
                SampleActionOnFixedIntention(selfId, wolvesID,
                                             composeSheepPolicy(sheepModel),
                                             sheepChooseActionMethod, blocksID)
                for selfId, sheepModel in zip(sheepsID, sheepModelsForPolicy)
            ]

            allIndividualSampleActionsPerturbed = wolvesSampleActionsPerturbed + sheepSampleActions
            sampleActionPerturbed = lambda state: [
                sampleIndividualAction(state) for sampleIndividualAction in
                allIndividualSampleActionsPerturbed
            ]

            trajectory = sampleTrajectory(sampleActionPerturbed)
            trajectories.append(trajectory)

        trajectoryFixedParameters = {
            'maxRunningStepsToSample': maxRunningStepsToSample
        }
        self.saveTrajectoryByParameters(trajectories,
                                        trajectoryFixedParameters, parameters)
コード例 #10
0
def main():
    DEBUG = 1
    renderOn = 1

    if DEBUG:
        parametersForTrajectoryPath = {}
        startSampleIndex = 1
        endSampleIndex = 2
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex)
        iterationIndex = 2
        numTrainStepEachIteration = 1
        numTrajectoriesPerIteration = 1

    else:
        parametersForTrajectoryPath = json.loads(sys.argv[1])
        startSampleIndex = int(sys.argv[2])
        endSampleIndex = int(sys.argv[3])
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex)
        iterationIndex = int(parametersForTrajectoryPath['iterationIndex'])
        numTrainStepEachIteration = int(parametersForTrajectoryPath['numTrainStepEachIteration'])
        numTrajectoriesPerIteration = int(parametersForTrajectoryPath['numTrajectoriesPerIteration'])

    # check file exists or not
    dirName = os.path.dirname(__file__)
    trajectoriesSaveDirectory = os.path.join(dirName, '..', '..',  'data', 'iterTrain2wolves1sheepMADDPGEnv', 'trajectories')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)

    trajectorySaveExtension = '.pickle'

    maxRunningSteps = 50
    numSimulations = 250
    killzoneRadius = 50
    numTree = 2
    fixedParameters = {'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations, 'killzoneRadius': killzoneRadius}
    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters)
    trajectorySavePath = generateTrajectorySavePath(parametersForTrajectoryPath)

    if not os.path.isfile(trajectorySavePath):
        # env MDP
        sheepsID = [0]
        wolvesID = [1, 2]
        blocksID = []

        numSheeps = len(sheepsID)
        numWolves = len(wolvesID)
        numBlocks = len(blocksID)

        numAgents = numWolves + numSheeps
        numEntities = numAgents + numBlocks

        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2

        sheepMaxSpeed = 1.3 * 1
        wolfMaxSpeed = 1.0 * 1
        blockMaxSpeed = None

        entitiesSizeList = [sheepSize] * numSheeps + [wolfSize] * numWolves + [blockSize] * numBlocks
        entityMaxSpeedList = [sheepMaxSpeed] * numSheeps + [wolfMaxSpeed] * numWolves + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True] * numAgents + [False] * numBlocks
        massList = [1.0] * numEntities

        centralControlId = 1
        centerControlIndexList = [centralControlId]
        reshapeAction = UnpackCenterControlAction(centerControlIndexList)
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList,
                                              getCollisionForce, getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList, massList,
                                        entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState)
        interpolateState = TransitMultiAgentChasing(numEntities, reshapeAction, applyActionForce, applyEnvironForce, integrateState)

        numFramesToInterpolate = 1

        def transit(state, action):
            for frameIndex in range(numFramesToInterpolate):
                nextState = interpolateState(state, action)
                action = np.array([(0, 0)] * numAgents)
                state = nextState
            return nextState

        isTerminal = lambda state: False

        isCollision = IsCollision(getPosFromAgentState)
        collisonRewardWolf = 1
        punishForOutOfBound = PunishForOutOfBound()
        rewardWolf = RewardCentralControlPunishBond(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound, collisonRewardWolf)
        collisonRewardSheep = -1
        rewardSheep = RewardCentralControlPunishBond(sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound, collisonRewardSheep)
        terminalRewardList = [collisonRewardSheep,collisonRewardWolf]
        rewardMultiAgents = [rewardSheep, rewardWolf]

        resetState = ResetMultiAgentChasing(numAgents, numBlocks)

        observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState)
        observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]

    # policy
        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)]
        wolfActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)]

        preyPowerRatio = 0.5
        sheepActionSpace = list(map(tuple, np.array(actionSpace) * preyPowerRatio))

        predatorPowerRatio = 0.5
        wolfActionOneSpace = list(map(tuple, np.array(wolfActionSpace) * predatorPowerRatio))
        wolfActionTwoSpace = list(map(tuple, np.array(wolfActionSpace) * predatorPowerRatio))

        wolvesActionSpace = list(product(wolfActionOneSpace, wolfActionTwoSpace))

        actionSpaceList = [sheepActionSpace, wolvesActionSpace]

        # neural network init
        numStateSpace = 4 * numEntities
        numSheepActionSpace = len(sheepActionSpace)
        numWolvesActionSpace = len(wolvesActionSpace)

        regularizationFactor = 1e-4
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace, regularizationFactor)
        generateWolvesModel = GenerateModel(numStateSpace, numWolvesActionSpace, regularizationFactor)
        generateModelList = [generateSheepModel, generateWolvesModel]

        sheepDepth = 9
        wolfDepth = 9
        depthList = [sheepDepth, wolfDepth]
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        sheepId,wolvesId = [0,1]
        trainableAgentIds = [sheepId, wolvesId]

        multiAgentNNmodel = [generateModel(sharedWidths * depth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate) for depth, generateModel in zip(depthList, generateModelList)]

        otherAgentApproximatePolicy = [lambda NNmodel, : ApproximatePolicy(NNmodel, sheepActionSpace), lambda NNmodel, : ApproximatePolicy(NNmodel, wolvesActionSpace)]
        # NNGuidedMCTS init
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        getApproximatePolicy = [lambda NNmodel, : ApproximatePolicy(NNmodel, sheepActionSpace), lambda NNmodel, : ApproximatePolicy(NNmodel, wolvesActionSpace)]
        getApproximateValue = [lambda NNmodel: ApproximateValue(NNmodel), lambda NNmodel: ApproximateValue(NNmodel)]

        def getStateFromNode(node): return list(node.id.values())[0]

        chooseActionInMCTS = sampleFromDistribution

        composeMultiAgentTransitInSingleAgentMCTS = ComposeMultiAgentTransitInSingleAgentMCTS(chooseActionInMCTS)
        composeSingleAgentGuidedMCTS = ComposeSingleAgentGuidedMCTS(numTree, numSimulations, actionSpaceList, terminalRewardList, selectChild, isTerminal, transit, getStateFromNode, getApproximatePolicy, getApproximateValue, composeMultiAgentTransitInSingleAgentMCTS)
        prepareMultiAgentPolicy = PrepareMultiAgentPolicy(composeSingleAgentGuidedMCTS, otherAgentApproximatePolicy, trainableAgentIds)

        # load model
        NNModelSaveExtension = ''
        NNModelSaveDirectory = os.path.join(dirName, '..', '..',  'data', 'iterTrain2wolves1sheepMADDPGEnv', 'NNModelRes')
        if not os.path.exists(NNModelSaveDirectory):
            os.makedirs(NNModelSaveDirectory)

        generateNNModelSavePath = GetSavePath(NNModelSaveDirectory, NNModelSaveExtension, fixedParameters)

        for agentId in trainableAgentIds:
            modelPath = generateNNModelSavePath({'iterationIndex': iterationIndex - 1, 'agentId': agentId, 'numTrajectoriesPerIteration': numTrajectoriesPerIteration, 'numTrainStepEachIteration': numTrainStepEachIteration})
            restoredNNModel = restoreVariables(multiAgentNNmodel[agentId], modelPath)
            multiAgentNNmodel[agentId] = restoredNNModel

        multiAgentPolicy = prepareMultiAgentPolicy(multiAgentNNmodel)
        chooseActionList = [maxFromDistribution, maxFromDistribution]

        def sampleAction(state):
            actionDists = multiAgentPolicy(state)
            action = [chooseAction(actionDist) for actionDist, chooseAction in zip(actionDists, chooseActionList)]
            return action

        render = lambda state: None
        forwardOneStep = ForwardMultiAgentsOneStep(transit, rewardMultiAgents)
        sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, isTerminal, resetState, forwardOneStep, render, renderOn)

        trajectories = [sampleTrajectory(sampleAction) for sampleIndex in range(startSampleIndex, endSampleIndex)]
        print([len(traj) for traj in trajectories])
        saveToPickle(trajectories, trajectorySavePath)
コード例 #11
0
def main():
    numWolves = 2
    numSheep = 1
    numWolvesStateSpaces = [
        2 * (numInWe + 1) for numInWe in range(2, numWolves + 1)
    ]
    actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                   (0, -10), (7, -7)]
    #actionSpace = [(10, 0), (0, 10), (-10, 0), (0, -10)]
    predatorPowerRatio = 8
    wolfIndividualActionSpace = list(
        map(tuple,
            np.array(actionSpace) * predatorPowerRatio))
    wolvesCentralControlActionSpaces = [
        list(it.product(wolfIndividualActionSpace, repeat=numInWe))
        for numInWe in range(2, numWolves + 1)
    ]
    numWolvesCentralControlActionSpaces = [
        len(wolvesCentralControlActionSpace)
        for wolvesCentralControlActionSpace in wolvesCentralControlActionSpaces
    ]
    regularizationFactor = 1e-4
    generateWolvesCentralControlModels = [
        GenerateModel(numStateSpace, numActionSpace, regularizationFactor)
        for numStateSpace, numActionSpace in zip(
            numWolvesStateSpaces, numWolvesCentralControlActionSpaces)
    ]
    sharedWidths = [128]
    actionLayerWidths = [128]
    valueLayerWidths = [128]
    wolfNNDepth = 9
    resBlockSize = 2
    dropoutRate = 0.0
    initializationMethod = 'uniform'
    initWolvesCentralControlModels = [
        generateWolvesCentralControlModel(sharedWidths * wolfNNDepth,
                                          actionLayerWidths, valueLayerWidths,
                                          resBlockSize, initializationMethod,
                                          dropoutRate) for
        generateWolvesCentralControlModel in generateWolvesCentralControlModels
    ]
    NNNumSimulations = 250
    wolvesModelPaths = [
        os.path.join(
            '..', '..', 'data', 'preTrainModel', 'agentId=' +
            str(len(actionSpace) * np.sum([10**_ for _ in range(numInWe)])) +
            '_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations='
            + str(NNNumSimulations) + '_trainSteps=50000')
        for numInWe in range(2, numWolves + 1)
    ]
    print(wolvesModelPaths)
    wolvesCentralControlNNModels = [
        restoreVariables(initWolvesCentralControlModel, wolvesModelPath)
        for initWolvesCentralControlModel, wolvesModelPath in zip(
            initWolvesCentralControlModels, wolvesModelPaths)
    ]
    wolvesValueFunctionListBasedOnNumAgentsInWe = [
        ApproximateValue(NNModel) for NNModel in wolvesCentralControlNNModels
    ]
    valueFunction = wolvesValueFunctionListBasedOnNumAgentsInWe[numWolves - 2]

    xBoundary = [0, 600]
    yBoundary = [0, 600]
    reset = Reset(xBoundary, yBoundary, numWolves)

    numGridX = 120
    numGridY = 120
    xInterval = (xBoundary[1] - xBoundary[0]) / numGridX
    yInterval = (yBoundary[1] - yBoundary[0]) / numGridY
    sheepXPosition = [(gridIndex + 0.5) * xInterval
                      for gridIndex in range(numGridX)]
    sheepYPosition = [(gridIndex + 0.5) * yInterval
                      for gridIndex in range(numGridY)]

    wolvesState = reset()
    wolvesState = np.array([[300, 350], [550, 400]])
    print(wolvesState)
    levelValues = [sheepXPosition, sheepYPosition]
    levelNames = ["sheepXPosition", "sheepYPosition"]

    modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames)

    toSplitFrame = pd.DataFrame(index=modelIndex)

    evaluate = lambda df: evaluateValue(df, valueFunction, wolvesState)
    valueResultDf = toSplitFrame.groupby(levelNames).apply(evaluate)

    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    drawHeatmapPlot(valueResultDf, ax)

    fig.savefig('valueMap2', dpi=300)
    plt.show()
コード例 #12
0
    def __call__(self, parameters):
        print(parameters)
        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        softParamterForValue = parameters['valuePriorSoftMaxBeta']
        valuePriorEndTime = parameters['valuePriorEndTime']
        
        ## MDP Env  
	# state is all multi agent state # action is all multi agent action
        xBoundary = [0,600]
        yBoundary = [0,600]
        numOfAgent = numWolves + numSheep
        reset = Reset(xBoundary, yBoundary, numOfAgent)

        possibleSheepIds = list(range(numSheep))
        possibleWolvesIds = list(range(numSheep, numSheep + numWolves))
        getSheepStatesFromAll = lambda state: np.array(state)[possibleSheepIds]
        getWolvesStatesFromAll = lambda state: np.array(state)[possibleWolvesIds]
        killzoneRadius = 25
        isTerminal = IsTerminal(killzoneRadius, getSheepStatesFromAll, getWolvesStatesFromAll)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(xBoundary, yBoundary)
        interpolateOneFrame = InterpolateOneFrame(stayInBoundaryByReflectVelocity)
        numFramesToInterpolate = 5
        transit = TransitWithTerminalCheckOfInterpolation(numFramesToInterpolate, interpolateOneFrame, isTerminal)

        maxRunningSteps = 52
        timeCost = 1/maxRunningSteps
        terminalBonus = 1
        rewardFunction = RewardFunctionByTerminal(timeCost, terminalBonus, isTerminal)

        forwardOneStep = ForwardOneStep(transit, rewardFunction)
        sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset, forwardOneStep)

        ## MDP Policy
	# Sheep Part

	# Sheep Policy Function
        numSheepPolicyStateSpace = 2 * (numWolves + 1)
        sheepActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7),
                       (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)]
        preyPowerRatio = 12
        sheepIndividualActionSpace = list(map(tuple, np.array(sheepActionSpace) * preyPowerRatio))
        numSheepActionSpace = len(sheepIndividualActionSpace)
        regularizationFactor = 1e-4
        generateSheepModel = GenerateModel(numSheepPolicyStateSpace, numSheepActionSpace, regularizationFactor)
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        sheepNNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepModel = generateSheepModel(sharedWidths * sheepNNDepth, actionLayerWidths, valueLayerWidths, 
                resBlockSize, initializationMethod, dropoutRate)
        sheepModelPath = os.path.join('..', '..', 'data', 'preTrainModel',
                'agentId=0.'+str(numWolves)+'_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations=110_trainSteps=50000')
        sheepNNModel = restoreVariables(initSheepModel, sheepModelPath)
        sheepPolicy = ApproximatePolicy(sheepNNModel, sheepIndividualActionSpace)

        # Sheep Generate Action
        softParameterInPlanningForSheep = 2.5
        softPolicyInPlanningForSheep = SoftDistribution(softParameterInPlanningForSheep)
        softenSheepPolicy = lambda relativeAgentsStatesForSheepPolicy: softPolicyInPlanningForSheep(sheepPolicy(relativeAgentsStatesForSheepPolicy))

        sheepChooseActionMethod = sampleFromDistribution
        sheepSampleActions = [SampleActionOnFixedIntention(selfId, possibleWolvesIds, softenSheepPolicy, sheepChooseActionMethod) for selfId in possibleSheepIds]

	# Wolves Part

        # Policy Likelihood function: Wolf Centrol Control NN Policy Given Intention
        numWolvesStateSpaces = [2 * (numInWe + numSheep) 
                for numInWe in range(2, numWolves + 1)]
        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7),
                       (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)]
        predatorPowerRatio = 8
        wolfIndividualActionSpace = list(map(tuple, np.array(actionSpace) * predatorPowerRatio))
        wolvesCentralControlActionSpaces = [list(it.product(wolfIndividualActionSpace, repeat = numInWe)) 
                for numInWe in range(2, numWolves + 1)]
        numWolvesCentralControlActionSpaces = [len(wolvesCentralControlActionSpace)
                for wolvesCentralControlActionSpace in wolvesCentralControlActionSpaces]
        regularizationFactor = 1e-4
        generateWolvesCentralControlModels = [GenerateModel(numStateSpace, numActionSpace, regularizationFactor) 
            for numStateSpace, numActionSpace in zip(numWolvesStateSpaces, numWolvesCentralControlActionSpaces)]
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        wolfNNDepth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initWolvesCentralControlModels = [generateWolvesCentralControlModel(sharedWidths * wolfNNDepth, actionLayerWidths, valueLayerWidths, 
                resBlockSize, initializationMethod, dropoutRate) for generateWolvesCentralControlModel in generateWolvesCentralControlModels] 
        NNNumSimulations = 250
        wolvesModelPaths = [os.path.join('..', '..', 'data', 'preTrainModel', 
                'agentId=.'+str(len(actionSpace) * np.sum([10**_ for _ in
                range(numInWe)]))+'_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations='+str(NNNumSimulations)+'_trainSteps=50000') 
                for numInWe in range(2, numWolves + 1)]
        print(wolvesModelPaths)
        wolvesCentralControlNNModels = [restoreVariables(initWolvesCentralControlModel, wolvesModelPath) 
                for initWolvesCentralControlModel, wolvesModelPath in zip(initWolvesCentralControlModels, wolvesModelPaths)]
        wolvesCentralControlPolicies = [ApproximatePolicy(NNModel, actionSpace) 
                for NNModel, actionSpace in zip(wolvesCentralControlNNModels, wolvesCentralControlActionSpaces)] 


	# Wovels Generate Action
        softParameterInPlanning = 2.5
        softPolicyInPlanning = SoftDistribution(softParameterInPlanning)
        
        wolvesPolicy = lambda state: wolvesCentralControlPolicies[numWolves - 2](state) 
        wolfChooseActionMethod = sampleFromDistribution
        wolvesSampleAction = lambda state: wolfChooseActionMethod(softPolicyInPlanning(wolvesPolicy(state))) 
        
        def sampleAction(state):
            action = list(wolvesSampleAction(state)) + [sheepSampleAction(state) for sheepSampleAction in sheepSampleActions]
            return action

        # Sample and Save Trajectory
        trajectories = [sampleTrajectory(sampleAction) for _ in range(self.numTrajectories)]
        
        wolfType = 'sharedReward'
        trajectoryFixedParameters = {'sheepPolicySoft': softParameterInPlanningForSheep, 'wolfPolicySoft': softParameterInPlanning,
                'maxRunningSteps': maxRunningSteps, 'hierarchy': 0, 'NNNumSimulations':NNNumSimulations, 'wolfType': wolfType}
        self.saveTrajectoryByParameters(trajectories, trajectoryFixedParameters, parameters)
        print(np.mean([len(tra) for tra in trajectories]))
コード例 #13
0
    def __call__(self, parameters):
        print(parameters)
        visualizeTraj = False

        numWolves = parameters['numWolves']
        numSheep = parameters['numSheep']
        softParamterForValue = parameters['valuePriorSoftMaxBeta']
        valuePriorEndTime = parameters['valuePriorEndTime']
        deviationFor2DAction = parameters['deviationFor2DAction']
        rationalityBetaInInference = parameters['rationalityBetaInInference']
        wolfType = parameters['wolfType']
        sheepConcern = parameters['sheepConcern']
        print(rationalityBetaInInference)

        ## MDP Env
        # state is all multi agent state # action is all multi agent action
        wolvesID = list(range(numWolves))
        sheepsID = list(range(numWolves, numWolves + numSheep))
        possibleWolvesIds = wolvesID
        possibleSheepIds = sheepsID

        numAgents = numWolves + numSheep
        numBlocks = 5 - numWolves
        blocksID = list(range(numAgents, numAgents + numBlocks))
        numEntities = numAgents + numBlocks

        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2

        sheepMaxSpeed = 1.3 * 1
        wolfMaxSpeed = 1.0 * 1
        blockMaxSpeed = None

        entitiesSizeList = [wolfSize] * numWolves + [sheepSize] * numSheep + [
            blockSize
        ] * numBlocks
        entityMaxSpeedList = [wolfMaxSpeed] * numWolves + [
            sheepMaxSpeed
        ] * numSheep + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True] * numAgents + [False] * numBlocks
        massList = [1.0] * numEntities

        reshapeActionInTransit = lambda action: action
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID,
                                            entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList,
                                              entitiesSizeList,
                                              getCollisionForce,
                                              getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList,
                                        massList, entityMaxSpeedList,
                                        getVelFromAgentState,
                                        getPosFromAgentState)
        transit = TransitMultiAgentChasing(numEntities, reshapeActionInTransit,
                                           applyActionForce, applyEnvironForce,
                                           integrateState)

        isCollision = IsCollision(getPosFromAgentState)
        collisonRewardWolf = 1
        punishForOutOfBoundForWolf = lambda stata: 0
        rewardWolf = RewardCentralControlPunishBond(
            wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState,
            isCollision, punishForOutOfBoundForWolf, collisonRewardWolf)
        collisonRewardSheep = -1
        punishForOutOfBoundForSheep = PunishForOutOfBound()
        rewardSheep = RewardCentralControlPunishBond(
            sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState,
            isCollision, punishForOutOfBoundForSheep, collisonRewardSheep)

        forwardOneStep = ForwardOneStep(transit, rewardWolf)

        reset = ResetMultiAgentChasing(numAgents, numBlocks)
        isTerminal = lambda state: False
        maxRunningSteps = 101
        sampleTrajectory = SampleTrajectory(maxRunningSteps, isTerminal, reset,
                                            forwardOneStep)

        ## MDP Policy
        worldDim = 2
        actionDim = worldDim * 2 + 1

        layerWidth = [64 * (numWolves - 1), 64 * (numWolves - 1)]

        # Sheep Part
        # ------------ model ------------------------
        if sheepConcern == 'selfSheep':
            sheepConcernSelfOnly = 1
        if sheepConcern == 'allSheep':
            sheepConcernSelfOnly = 0
        numSheepToObserveWhenSheepSameOrDiff = [numSheep, 1]
        numSheepToObserve = numSheepToObserveWhenSheepSameOrDiff[
            sheepConcernSelfOnly]

        print(numSheepToObserve)
        sheepModelListOfDiffWolfReward = []
        sheepType = 'mixed'
        if sheepType == 'mixed':
            sheepPrefixList = ['maddpgIndividWolf', 'maddpg']
        else:
            sheepPrefixList = [sheepType]
        for sheepPrefix in sheepPrefixList:
            wolvesIDForSheepObserve = list(range(numWolves))
            sheepsIDForSheepObserve = list(
                range(numWolves, numSheepToObserve + numWolves))
            blocksIDForSheepObserve = list(
                range(numSheepToObserve + numWolves,
                      numSheepToObserve + numWolves + numBlocks))
            observeOneAgentForSheep = lambda agentID: Observe(
                agentID, wolvesIDForSheepObserve, sheepsIDForSheepObserve,
                blocksIDForSheepObserve, getPosFromAgentState,
                getVelFromAgentState)
            observeSheep = lambda state: [
                observeOneAgentForSheep(agentID)(state)
                for agentID in range(numWolves + numSheepToObserve)
            ]

            obsIDsForSheep = wolvesIDForSheepObserve + sheepsIDForSheepObserve + blocksIDForSheepObserve
            initObsForSheepParams = observeSheep(reset()[obsIDsForSheep])
            obsShapeSheep = [
                initObsForSheepParams[obsID].shape[0]
                for obsID in range(len(initObsForSheepParams))
            ]

            buildSheepModels = BuildMADDPGModels(actionDim,
                                                 numWolves + numSheepToObserve,
                                                 obsShapeSheep)
            sheepModelsList = [
                buildSheepModels(layerWidth, agentID)
                for agentID in range(numWolves, numWolves + numSheepToObserve)
            ]

            dirName = os.path.dirname(__file__)
            maxEpisode = 60000
            print(sheepPrefix)
            sheepFileName = "{}wolves{}sheep{}blocks{}eps_agent".format(
                numWolves, numSheepToObserve, numBlocks, maxEpisode)
            sheepModelPaths = [
                os.path.join(dirName, '..', '..', 'data', 'preTrainModel',
                             sheepPrefix + sheepFileName + str(i) + '60000eps')
                for i in range(numWolves, numWolves + numSheepToObserve)
            ]

            [
                restoreVariables(model, path)
                for model, path in zip(sheepModelsList, sheepModelPaths)
            ]
            sheepModelListOfDiffWolfReward = sheepModelListOfDiffWolfReward + sheepModelsList

        # Sheep Policy Function
        reshapeAction = ReshapeAction()
        actOneStepOneModelSheep = ActOneStep(actByPolicyTrainNoisy)

        # Sheep Generate Action
        numAllSheepModels = len(sheepModelListOfDiffWolfReward)

        # Wolves Part

        # Intention Prior For inference
        #createIntentionSpaceGivenSelfId = CreateIntentionSpaceGivenSelfId(possibleSheepIds, possibleWolvesIds)
        #intentionSpacesForAllWolves = [createAllPossibleIntentionsGivenSelfId(wolfId)
        #        for wolfId in possibleWolvesIds]
        intentionSpacesForAllWolves = [
            tuple(it.product(possibleSheepIds, [tuple(possibleWolvesIds)]))
            for wolfId in possibleWolvesIds
        ]
        print(intentionSpacesForAllWolves)
        wolvesIntentionPriors = [{
            tuple(intention): 1 / len(allPossibleIntentionsOneWolf)
            for intention in allPossibleIntentionsOneWolf
        } for allPossibleIntentionsOneWolf in intentionSpacesForAllWolves]
        # Percept Action For Inference
        #perceptAction = lambda action: action
        perceptSelfAction = SampleNoisyAction(deviationFor2DAction)
        perceptOtherAction = SampleNoisyAction(deviationFor2DAction)
        perceptAction = PerceptImaginedWeAction(possibleWolvesIds,
                                                perceptSelfAction,
                                                perceptOtherAction)
        #perceptAction = lambda action: action

        # Policy Likelihood function: Wolf Centrol Control NN Policy Given Intention
        # ------------ model ------------------------
        weModelsListBaseOnNumInWe = []
        observeListBaseOnNumInWe = []
        for numAgentInWe in range(2, numWolves + 1):
            numBlocksForWe = 5 - numAgentInWe
            wolvesIDForWolfObserve = list(range(numAgentInWe))
            sheepsIDForWolfObserve = list(range(numAgentInWe,
                                                1 + numAgentInWe))
            blocksIDForWolfObserve = list(
                range(1 + numAgentInWe, 1 + numAgentInWe + numBlocksForWe))
            observeOneAgentForWolf = lambda agentID: Observe(
                agentID, wolvesIDForWolfObserve, sheepsIDForWolfObserve,
                blocksIDForWolfObserve, getPosFromAgentState,
                getVelFromAgentState)
            observeWolf = lambda state: [
                observeOneAgentForWolf(agentID)(state)
                for agentID in range(numAgentInWe + 1)
            ]
            observeListBaseOnNumInWe.append(observeWolf)

            obsIDsForWolf = wolvesIDForWolfObserve + sheepsIDForWolfObserve + blocksIDForWolfObserve
            initObsForWolfParams = observeWolf(reset()[obsIDsForWolf])
            obsShapeWolf = [
                initObsForWolfParams[obsID].shape[0]
                for obsID in range(len(initObsForWolfParams))
            ]
            buildWolfModels = BuildMADDPGModels(actionDim, numAgentInWe + 1,
                                                obsShapeWolf)
            layerWidthForWolf = [
                64 * (numAgentInWe - 1), 64 * (numAgentInWe - 1)
            ]
            wolfModelsList = [
                buildWolfModels(layerWidthForWolf, agentID)
                for agentID in range(numAgentInWe)
            ]

            if wolfType == 'sharedAgencyByIndividualRewardWolf':
                wolfPrefix = 'maddpgIndividWolf'
            if wolfType == 'sharedAgencyBySharedRewardWolf':
                wolfPrefix = 'maddpg'
            wolfFileName = "{}wolves{}sheep{}blocks{}eps_agent".format(
                numAgentInWe, 1, numBlocksForWe, maxEpisode)
            wolfModelPaths = [
                os.path.join(dirName, '..', '..', 'data', 'preTrainModel',
                             wolfPrefix + wolfFileName + str(i) + '60000eps')
                for i in range(numAgentInWe)
            ]
            print(numAgentInWe, obsShapeWolf, wolfModelPaths)

            [
                restoreVariables(model, path)
                for model, path in zip(wolfModelsList, wolfModelPaths)
            ]
            weModelsListBaseOnNumInWe.append(wolfModelsList)

        actionDimReshaped = 2
        cov = [deviationFor2DAction**2 for _ in range(actionDimReshaped)]
        buildGaussian = BuildGaussianFixCov(cov)
        actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoNoisy)
        #actOneStepOneModelWolf = ActOneStep(actByPolicyTrainNoisy)
        composeCentralControlPolicy = lambda observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction(
            reshapeAction, observe, actOneStepOneModelWolf, buildGaussian)
        wolvesCentralControlPolicies = [
            composeCentralControlPolicy(
                observeListBaseOnNumInWe[numAgentsInWe - 2])(
                    weModelsListBaseOnNumInWe[numAgentsInWe - 2],
                    numAgentsInWe)
            for numAgentsInWe in range(2, numWolves + 1)
        ]

        centralControlPolicyListBasedOnNumAgentsInWe = wolvesCentralControlPolicies  # 0 for two agents in We, 1 for three agents...
        softPolicyInInference = lambda distribution: distribution
        getStateThirdPersonPerspective = lambda state, goalId, weIds: getStateOrActionThirdPersonPerspective(
            state, goalId, weIds, blocksID)
        policyForCommittedAgentsInInference = PolicyForCommittedAgent(
            centralControlPolicyListBasedOnNumAgentsInWe,
            softPolicyInInference, getStateThirdPersonPerspective)
        concernedAgentsIds = possibleWolvesIds
        calCommittedAgentsPolicyLikelihood = CalCommittedAgentsContinuousPolicyLikelihood(
            concernedAgentsIds, policyForCommittedAgentsInInference,
            rationalityBetaInInference)

        randomActionSpace = [(5, 0), (3.5, 3.5), (0, 5), (-3.5, 3.5), (-5, 0),
                             (-3.5, -3.5), (0, -5), (3.5, -3.5), (0, 0)]
        randomPolicy = RandomPolicy(randomActionSpace)
        getStateFirstPersonPerspective = lambda state, goalId, weIds, selfId: getStateOrActionFirstPersonPerspective(
            state, goalId, weIds, selfId, blocksID)
        policyForUncommittedAgentsInInference = PolicyForUncommittedAgent(
            possibleWolvesIds, randomPolicy, softPolicyInInference,
            getStateFirstPersonPerspective)
        calUncommittedAgentsPolicyLikelihood = CalUncommittedAgentsPolicyLikelihood(
            possibleWolvesIds, concernedAgentsIds,
            policyForUncommittedAgentsInInference)

        # Joint Likelihood
        calJointLikelihood = lambda intention, state, perceivedAction: calCommittedAgentsPolicyLikelihood(intention, state, perceivedAction) * \
                calUncommittedAgentsPolicyLikelihood(intention, state, perceivedAction)

        # Infer and update Intention
        variablesForAllWolves = [
            [intentionSpace] for intentionSpace in intentionSpacesForAllWolves
        ]
        jointHypothesisSpaces = [
            pd.MultiIndex.from_product(variables, names=['intention'])
            for variables in variablesForAllWolves
        ]
        concernedHypothesisVariable = ['intention']
        priorDecayRate = 1
        softPrior = SoftDistribution(priorDecayRate)
        inferIntentionOneStepList = [
            InferOneStep(jointHypothesisSpace, concernedHypothesisVariable,
                         calJointLikelihood, softPrior)
            for jointHypothesisSpace in jointHypothesisSpaces
        ]

        if numSheep == 1:
            inferIntentionOneStepList = [lambda prior, state, action: prior
                                         ] * 3

        adjustIntentionPriorGivenValueOfState = lambda state: 1
        chooseIntention = sampleFromDistribution
        updateIntentions = [
            UpdateIntention(intentionPrior, valuePriorEndTime,
                            adjustIntentionPriorGivenValueOfState,
                            perceptAction, inferIntentionOneStep,
                            chooseIntention)
            for intentionPrior, inferIntentionOneStep in zip(
                wolvesIntentionPriors, inferIntentionOneStepList)
        ]

        # reset intention and adjuste intention prior attributes tools for multiple trajectory
        intentionResetAttributes = [
            'timeStep', 'lastState', 'lastAction', 'intentionPrior',
            'formerIntentionPriors'
        ]
        intentionResetAttributeValues = [
            dict(
                zip(intentionResetAttributes,
                    [0, None, None, intentionPrior, [intentionPrior]]))
            for intentionPrior in wolvesIntentionPriors
        ]
        resetIntentions = ResetObjects(intentionResetAttributeValues,
                                       updateIntentions)
        returnAttributes = ['formerIntentionPriors']
        getIntentionDistributions = GetObjectsValuesOfAttributes(
            returnAttributes, updateIntentions)
        attributesToRecord = ['lastAction']
        recordActionForUpdateIntention = RecordValuesForObjects(
            attributesToRecord, updateIntentions)

        # Wovels Generate Action
        covForPlanning = [0.03**2 for _ in range(actionDimReshaped)]
        buildGaussianForPlanning = BuildGaussianFixCov(covForPlanning)
        composeCentralControlPolicyForPlanning = lambda observe: ComposeCentralControlPolicyByGaussianOnDeterministicAction(
            reshapeAction, observe, actOneStepOneModelWolf,
            buildGaussianForPlanning)
        wolvesCentralControlPoliciesForPlanning = [
            composeCentralControlPolicyForPlanning(
                observeListBaseOnNumInWe[numAgentsInWe - 2])(
                    weModelsListBaseOnNumInWe[numAgentsInWe - 2],
                    numAgentsInWe)
            for numAgentsInWe in range(2, numWolves + 1)
        ]

        centralControlPolicyListBasedOnNumAgentsInWeForPlanning = wolvesCentralControlPoliciesForPlanning  # 0 for two agents in We, 1 for three agents...
        softPolicyInPlanning = lambda distribution: distribution
        policyForCommittedAgentInPlanning = PolicyForCommittedAgent(
            centralControlPolicyListBasedOnNumAgentsInWeForPlanning,
            softPolicyInPlanning, getStateThirdPersonPerspective)

        policyForUncommittedAgentInPlanning = PolicyForUncommittedAgent(
            possibleWolvesIds, randomPolicy, softPolicyInPlanning,
            getStateFirstPersonPerspective)

        def wolfChooseActionMethod(individualContinuousDistributions):
            centralControlAction = tuple([
                tuple(sampleFromContinuousSpace(distribution))
                for distribution in individualContinuousDistributions
            ])
            return centralControlAction

        getSelfActionThirdPersonPerspective = lambda weIds, selfId: list(
            weIds).index(selfId)
        chooseCommittedAction = GetActionFromJointActionDistribution(
            wolfChooseActionMethod, getSelfActionThirdPersonPerspective)
        chooseUncommittedAction = sampleFromDistribution
        wolvesSampleIndividualActionGivenIntentionList = [
            SampleIndividualActionGivenIntention(
                selfId, policyForCommittedAgentInPlanning,
                policyForUncommittedAgentInPlanning, chooseCommittedAction,
                chooseUncommittedAction) for selfId in possibleWolvesIds
        ]

        # Sample and Save Trajectory
        trajectoriesWithIntentionDists = []
        for trajectoryId in range(self.numTrajectories):
            sheepModelsForPolicy = [
                sheepModelListOfDiffWolfReward[np.random.choice(
                    numAllSheepModels)] for sheepId in possibleSheepIds
            ]
            if sheepConcernSelfOnly:
                composeSheepPolicy = lambda sheepModel: lambda state: {
                    tuple(
                        reshapeAction(
                            actOneStepOneModelSheep(sheepModel,
                                                    observeSheep(state)))):
                    1
                }
                sheepChooseActionMethod = sampleFromDistribution
                sheepSampleActions = [
                    SampleActionOnFixedIntention(
                        selfId, possibleWolvesIds,
                        composeSheepPolicy(sheepModel),
                        sheepChooseActionMethod, blocksID) for selfId,
                    sheepModel in zip(possibleSheepIds, sheepModelsForPolicy)
                ]
            else:
                composeSheepPolicy = lambda sheepModel: lambda state: tuple(
                    reshapeAction(
                        actOneStepOneModelSheep(sheepModel, observeSheep(state)
                                                )))
                sheepSampleActions = [
                    composeSheepPolicy(sheepModel)
                    for sheepModel in sheepModelsForPolicy
                ]

            wolvesSampleActions = [
                SampleActionOnChangableIntention(
                    updateIntention,
                    wolvesSampleIndividualActionGivenIntention)
                for updateIntention, wolvesSampleIndividualActionGivenIntention
                in zip(updateIntentions,
                       wolvesSampleIndividualActionGivenIntentionList)
            ]
            allIndividualSampleActions = wolvesSampleActions + sheepSampleActions
            sampleActionMultiAgent = SampleActionMultiagent(
                allIndividualSampleActions, recordActionForUpdateIntention)
            trajectory = sampleTrajectory(sampleActionMultiAgent)
            intentionDistributions = getIntentionDistributions()
            trajectoryWithIntentionDists = [
                tuple(list(SASRPair) + list(intentionDist)) for SASRPair,
                intentionDist in zip(trajectory, intentionDistributions)
            ]
            trajectoriesWithIntentionDists.append(
                tuple(trajectoryWithIntentionDists))
            resetIntentions()
            #print(intentionDistributions)
        trajectoryFixedParameters = {'maxRunningSteps': maxRunningSteps}
        self.saveTrajectoryByParameters(trajectoriesWithIntentionDists,
                                        trajectoryFixedParameters, parameters)
        print(np.mean([len(tra) for tra in trajectoriesWithIntentionDists]))

        # visualize
        if visualizeTraj:
            wolfColor = np.array([0.85, 0.35, 0.35])
            sheepColor = np.array([0.35, 0.85, 0.35])
            blockColor = np.array([0.25, 0.25, 0.25])
            entitiesColorList = [wolfColor] * numWolves + [
                sheepColor
            ] * numSheep + [blockColor] * numBlocks
            render = Render(entitiesSizeList, entitiesColorList, numAgents,
                            getPosFromAgentState)
            trajToRender = np.concatenate(trajectoriesWithIntentionDists)
            render(trajToRender)
コード例 #14
0
def main():
    DEBUG = 0
    renderOn = 0
    if DEBUG:
        parametersForTrajectoryPath = {}
        startSampleIndex = 0
        endSampleIndex = 10
        agentId = 1
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex,
                                                      endSampleIndex)
    else:
        parametersForTrajectoryPath = json.loads(sys.argv[1])
        startSampleIndex = int(sys.argv[2])
        endSampleIndex = int(sys.argv[3])
        agentId = int(parametersForTrajectoryPath['agentId'])
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex,
                                                      endSampleIndex)

    # check file exists or not
    dirName = os.path.dirname(__file__)
    trajectoriesSaveDirectory = os.path.join(
        dirName, '..', '..', '..', '..', 'data', '2wolves1sheep',
        'trainWolvesTwoCenterControlMultiTrees', 'trajectories')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)

    trajectorySaveExtension = '.pickle'
    maxRunningSteps = 50
    numSimulations = 500
    killzoneRadius = 50
    fixedParameters = {
        'agentId': agentId,
        'maxRunningSteps': maxRunningSteps,
        'numSimulations': numSimulations,
        'killzoneRadius': killzoneRadius
    }

    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory,
                                             trajectorySaveExtension,
                                             fixedParameters)

    trajectorySavePath = generateTrajectorySavePath(
        parametersForTrajectoryPath)

    if not os.path.isfile(trajectorySavePath):
        numOfAgent = 3
        sheepId = 0
        wolvesId = 1

        wolfOneId = 1
        wolfTwoId = 2

        xPosIndex = [0, 1]
        xBoundary = [0, 600]
        yBoundary = [0, 600]

        getSheepXPos = GetAgentPosFromState(sheepId, xPosIndex)
        getWolfOneXPos = GetAgentPosFromState(wolfOneId, xPosIndex)
        getWolfTwoXPos = GetAgentPosFromState(wolfTwoId, xPosIndex)

        reset = Reset(xBoundary, yBoundary, numOfAgent)

        isTerminalOne = IsTerminal(getWolfOneXPos, getSheepXPos,
                                   killzoneRadius)
        isTerminalTwo = IsTerminal(getWolfTwoXPos, getSheepXPos,
                                   killzoneRadius)

        isTerminal = lambda state: isTerminalOne(state) or isTerminalTwo(state)

        stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(
            xBoundary, yBoundary)

        centerControlIndexList = [wolvesId]
        unpackCenterControlAction = UnpackCenterControlAction(
            centerControlIndexList)
        transitionFunction = TransiteForNoPhysicsWithCenterControlAction(
            stayInBoundaryByReflectVelocity)

        numFramesToInterpolate = 3
        transit = TransitWithInterpolateStateWithCenterControlAction(
            numFramesToInterpolate, transitionFunction, isTerminal,
            unpackCenterControlAction)

        # NNGuidedMCTS init
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7),
                       (0, -10), (7, -7), (0, 0)]
        wolfActionSpace = actionSpace
        # wolfActionSpace = [(10, 0), (0, 10), (-10, 0), (0, -10), (0, 0)]

        preyPowerRatio = 12
        sheepActionSpace = list(
            map(tuple,
                np.array(actionSpace) * preyPowerRatio))

        predatorPowerRatio = 8
        wolfActionOneSpace = list(
            map(tuple,
                np.array(wolfActionSpace) * predatorPowerRatio))
        wolfActionTwoSpace = list(
            map(tuple,
                np.array(wolfActionSpace) * predatorPowerRatio))

        wolvesActionSpace = list(
            product(wolfActionOneSpace, wolfActionTwoSpace))

        actionSpaceList = [sheepActionSpace, wolvesActionSpace]

        # neural network init
        numStateSpace = 2 * numOfAgent
        numSheepActionSpace = len(sheepActionSpace)
        numWolvesActionSpace = len(wolvesActionSpace)

        regularizationFactor = 1e-4
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        generateSheepModel = GenerateModel(numStateSpace, numSheepActionSpace,
                                           regularizationFactor)

        # load save dir
        NNModelSaveExtension = ''
        sheepNNModelSaveDirectory = os.path.join(
            dirName, '..', '..', '..', '..', 'data', '2wolves1sheep',
            'trainSheepWithTwoHeatSeekingWolves', 'trainedResNNModels')
        sheepNNModelFixedParameters = {
            'agentId': 0,
            'maxRunningSteps': 50,
            'numSimulations': 110,
            'miniBatchSize': 256,
            'learningRate': 0.0001,
        }
        getSheepNNModelSavePath = GetSavePath(sheepNNModelSaveDirectory,
                                              NNModelSaveExtension,
                                              sheepNNModelFixedParameters)

        depth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initSheepNNModel = generateSheepModel(sharedWidths * depth,
                                              actionLayerWidths,
                                              valueLayerWidths, resBlockSize,
                                              initializationMethod,
                                              dropoutRate)

        sheepTrainedModelPath = getSheepNNModelSavePath({
            'trainSteps': 50000,
            'depth': depth
        })
        sheepTrainedModel = restoreVariables(initSheepNNModel,
                                             sheepTrainedModelPath)
        sheepPolicy = ApproximatePolicy(sheepTrainedModel, sheepActionSpace)

        # MCTS
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        # prior
        getActionPrior = lambda state: {
            action: 1 / len(wolvesActionSpace)
            for action in wolvesActionSpace
        }

        # load chase nn policy
        temperatureInMCTS = 1
        chooseActionInMCTS = SampleAction(temperatureInMCTS)

        def wolvesTransit(state, action):
            return transit(state,
                           [chooseActionInMCTS(sheepPolicy(state)), action])

        # reward function
        aliveBonus = -1 / maxRunningSteps
        deathPenalty = 1
        rewardFunction = reward.RewardFunctionCompete(aliveBonus, deathPenalty,
                                                      isTerminal)

        # initialize children; expand
        initializeChildren = InitializeChildren(wolvesActionSpace,
                                                wolvesTransit, getActionPrior)
        expand = Expand(isTerminal, initializeChildren)

        # random rollout policy
        def rolloutPolicy(state):
            return wolvesActionSpace[np.random.choice(
                range(numWolvesActionSpace))]

        # rollout
        rolloutHeuristicWeight = 0
        minDistance = 400
        rolloutHeuristic1 = reward.HeuristicDistanceToTarget(
            rolloutHeuristicWeight, getWolfOneXPos, getSheepXPos, minDistance)
        rolloutHeuristic2 = reward.HeuristicDistanceToTarget(
            rolloutHeuristicWeight, getWolfTwoXPos, getSheepXPos, minDistance)

        rolloutHeuristic = lambda state: (rolloutHeuristic1(state) +
                                          rolloutHeuristic2(state)) / 2

        maxRolloutSteps = 15
        rollout = RollOut(rolloutPolicy, maxRolloutSteps, wolvesTransit,
                          rewardFunction, isTerminal, rolloutHeuristic)

        numTree = 4
        numSimulationsPerTree = int(numSimulations / numTree)
        wolfPolicy = StochasticMCTS(
            numTree, numSimulationsPerTree, selectChild, expand, rollout,
            backup, establishSoftmaxActionDistFromMultipleTrees)

        # All agents' policies
        policy = lambda state: [sheepPolicy(state), wolfPolicy(state)]
        chooseActionList = [chooseGreedyAction, chooseGreedyAction]

        render = None
        if renderOn:
            import pygame as pg
            from pygame.color import THECOLORS
            screenColor = THECOLORS['black']
            circleColorList = [
                THECOLORS['green'], THECOLORS['red'], THECOLORS['red']
            ]
            circleSize = 10

            saveImage = False
            saveImageDir = os.path.join(dirName, '..', '..', '..', '..',
                                        'data', 'demoImg')
            if not os.path.exists(saveImageDir):
                os.makedirs(saveImageDir)

            screen = pg.display.set_mode([xBoundary[1], yBoundary[1]])
            render = Render(numOfAgent, xPosIndex, screen, screenColor,
                            circleColorList, circleSize, saveImage,
                            saveImageDir)

        sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, transit,
                                                      isTerminal, reset,
                                                      chooseActionList, render,
                                                      renderOn)
        trajectories = [
            sampleTrajectory(policy)
            for sampleIndex in range(startSampleIndex, endSampleIndex)
        ]
        print([len(traj) for traj in trajectories])
        saveToPickle(trajectories, trajectorySavePath)
コード例 #15
0
def main():
    startTime = time.time()

    DEBUG = 0
    renderOn = 0
    if DEBUG:
        parametersForTrajectoryPath = {}
        startSampleIndex = 5
        endSampleIndex = 8
        agentId = 0
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex)
    else:
        parametersForTrajectoryPath = json.loads(sys.argv[1])
        startSampleIndex = int(sys.argv[2])
        endSampleIndex = int(sys.argv[3])
        agentId = int(parametersForTrajectoryPath['agentId'])
        parametersForTrajectoryPath['sampleIndex'] = (startSampleIndex, endSampleIndex)

    # check file exists or not
    dirName = os.path.dirname(__file__)
    trajectoriesSaveDirectory = os.path.join(dirName, '..', '..', '..', '..', 'data', 'MADDPG2wolves1sheep', 'trainSheepWithPretrrainWolves', 'trajectories')
    if not os.path.exists(trajectoriesSaveDirectory):
        os.makedirs(trajectoriesSaveDirectory)

    trajectorySaveExtension = '.pickle'
    maxRunningSteps = 50
    numSimulations = 250
    fixedParameters = {'agentId': agentId, 'maxRunningSteps': maxRunningSteps, 'numSimulations': numSimulations}

    generateTrajectorySavePath = GetSavePath(trajectoriesSaveDirectory, trajectorySaveExtension, fixedParameters)

    trajectorySavePath = generateTrajectorySavePath(parametersForTrajectoryPath)

    if not os.path.isfile(trajectorySavePath):

        # env MDP
        sheepsID = [0]
        wolvesID = [1, 2]
        blocksID = []

        numSheeps = len(sheepsID)
        numWolves = len(wolvesID)
        numBlocks = len(blocksID)

        numAgents = numWolves + numSheeps
        numEntities = numAgents + numBlocks

        sheepSize = 0.05
        wolfSize = 0.075
        blockSize = 0.2

        sheepMaxSpeed = 1.3 * 1
        wolfMaxSpeed = 1.0 * 1
        blockMaxSpeed = None

        entitiesSizeList = [sheepSize] * numSheeps + [wolfSize] * numWolves + [blockSize] * numBlocks
        entityMaxSpeedList = [sheepMaxSpeed] * numSheeps + [wolfMaxSpeed] * numWolves + [blockMaxSpeed] * numBlocks
        entitiesMovableList = [True] * numAgents + [False] * numBlocks
        massList = [1.0] * numEntities

        centralControlId = 1
        centerControlIndexList = [centralControlId]
        reshapeAction = UnpackCenterControlAction(centerControlIndexList)
        getCollisionForce = GetCollisionForce()
        applyActionForce = ApplyActionForce(wolvesID, sheepsID, entitiesMovableList)
        applyEnvironForce = ApplyEnvironForce(numEntities, entitiesMovableList, entitiesSizeList,
                                              getCollisionForce, getPosFromAgentState)
        integrateState = IntegrateState(numEntities, entitiesMovableList, massList,
                                        entityMaxSpeedList, getVelFromAgentState, getPosFromAgentState)
        interpolateState = TransitMultiAgentChasing(numEntities, reshapeAction, applyActionForce, applyEnvironForce, integrateState)

        numFramesToInterpolate = 1

        def transit(state, action):
            for frameIndex in range(numFramesToInterpolate):
                nextState = interpolateState(state, action)
                action = np.array([(0, 0)] * numAgents)
                state = nextState
            return nextState

        isTerminal = lambda state: False

        isCollision = IsCollision(getPosFromAgentState)
        collisonRewardWolf = 1
        punishForOutOfBound = PunishForOutOfBound()
        rewardWolf = RewardCentralControlPunishBond(wolvesID, sheepsID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound, collisonRewardWolf)
        collisonRewardSheep = -1
        rewardSheep = RewardCentralControlPunishBond(sheepsID, wolvesID, entitiesSizeList, getPosFromAgentState, isCollision, punishForOutOfBound, collisonRewardSheep)

        resetState = ResetMultiAgentChasing(numAgents, numBlocks)

        observeOneAgent = lambda agentID: Observe(agentID, wolvesID, sheepsID, blocksID, getPosFromAgentState, getVelFromAgentState)
        observe = lambda state: [observeOneAgent(agentID)(state) for agentID in range(numAgents)]

    # policy
        actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)]
        wolfActionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7), (0, 0)]

        preyPowerRatio = 0.5
        sheepActionSpace = list(map(tuple, np.array(actionSpace) * preyPowerRatio))

        predatorPowerRatio = 0.5
        wolfActionOneSpace = list(map(tuple, np.array(wolfActionSpace) * predatorPowerRatio))
        wolfActionTwoSpace = list(map(tuple, np.array(wolfActionSpace) * predatorPowerRatio))

        wolvesActionSpace = list(product(wolfActionOneSpace, wolfActionTwoSpace))

        actionSpaceList = [sheepActionSpace, wolvesActionSpace]

        # neural network init
        numStateSpace = 4 * numEntities
        numSheepActionSpace = len(sheepActionSpace)
        numWolvesActionSpace = len(wolvesActionSpace)

        regularizationFactor = 1e-4
        sharedWidths = [128]
        actionLayerWidths = [128]
        valueLayerWidths = [128]
        generateWolvesModel = GenerateModel(numStateSpace, numWolvesActionSpace, regularizationFactor)

# wolf NN Policy
        NNModelSaveExtension = ''
        wolfTrainedModelPath = os.path.join(dirName, '..', '..', '..', '..', 'data', 'MADDPG2wolves1sheep', 'trainWolvesTwoCenterControlAction', 'trainedResNNModels', 'agentId=1_depth=9_learningRate=0.0001_maxRunningSteps=50_miniBatchSize=256_numSimulations=250_trainSteps=50000')

        depth = 9
        resBlockSize = 2
        dropoutRate = 0.0
        initializationMethod = 'uniform'
        initWolfNNModel = generateWolvesModel(sharedWidths * depth, actionLayerWidths, valueLayerWidths, resBlockSize, initializationMethod, dropoutRate)

        wolfTrainedModel = restoreVariables(initWolfNNModel, wolfTrainedModelPath)
        wolfPolicy = ApproximatePolicy(wolfTrainedModel, wolvesActionSpace)

    # MCTS
        cInit = 1
        cBase = 100
        calculateScore = ScoreChild(cInit, cBase)
        selectChild = SelectChild(calculateScore)

        # prior
        getActionPrior = lambda state: {action: 1 / len(sheepActionSpace) for action in sheepActionSpace}

    # load chase nn policy
        chooseActionInMCTS = sampleFromDistribution

        def sheepTransit(state, action): return transit(
            state, [action, chooseActionInMCTS(wolfPolicy(state))])

        # initialize children; expand
        initializeChildren = InitializeChildren(
            sheepActionSpace, sheepTransit, getActionPrior)
        isTerminal = lambda state: False
        expand = Expand(isTerminal, initializeChildren)

        # random rollout policy
        def rolloutPolicy(
            state): return [sheepActionSpace[np.random.choice(range(numSheepActionSpace))],sampleFromDistribution(wolfPolicy(state))]

        rolloutHeuristic = lambda state: 0
        maxRolloutSteps = 15
        rollout = RollOut(rolloutPolicy, maxRolloutSteps, transit, rewardSheep, isTerminal, rolloutHeuristic)

        sheepPolicy = MCTS(numSimulations, selectChild, expand, rollout, backup, establishSoftmaxActionDist)

        # All agents' policies
        policy = lambda state: [sheepPolicy(state), wolfPolicy(state)]
        chooseActionList = [maxFromDistribution, maxFromDistribution]

        def sampleAction(state):
            actionDists = [sheepPolicy(state), wolfPolicy(state)]
            action = [chooseAction(actionDist) for actionDist, chooseAction in zip(actionDists, chooseActionList)]
            return action

        render = lambda state: None
        forwardOneStep = ForwardOneStep(transit, rewardSheep)
        sampleTrajectory = SampleTrajectoryWithRender(maxRunningSteps, isTerminal, resetState, forwardOneStep, render, renderOn)

        trajectories = [sampleTrajectory(sampleAction) for sampleIndex in range(startSampleIndex, endSampleIndex)]
        print([len(traj) for traj in trajectories])
        saveToPickle(trajectories, trajectorySavePath)

    endTime = time.time()