def main(): # MDP Env xBoundary = [0, 600] yBoundary = [0, 600] xSwamp = [300, 400] ySwamp = [300, 400] swamp = [[[300, 400], [300, 400]]] noise = [50, 50] stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) transitionWithNoise = TransitionWithNoise(noise) minDistance = 50 target = [600, 600] isTerminal = IsTerminal(minDistance, target) isInSwamp = IsInSwamp(swamp) singleAgentTransit = MovingAgentTransitionInSwampWorld( transitionWithNoise, stayInBoundaryByReflectVelocity, isTerminal) transitionFunctionPack = [singleAgentTransit, static] multiAgentTransition = MultiAgentTransitionInGeneral( transitionFunctionPack) twoAgentTransit = MultiAgentTransitionInSwampWorld(multiAgentTransition, target) numOfAgent = 2 xBoundaryReset = [500, 600] yBoundaryReset = [0, 100] resetState = Reset([0, 0], [0, 0], numOfAgent, target) actionSpace = [(100, 0), (-100, 0), (0, 100), (0, -100)] #k = np.random.choice(actionSpace) #print(k) actionCost = -1 swampPenalty = -100 terminalReward = 1000 rewardFunction = RewardFunction(actionCost, terminalReward, swampPenalty, isTerminal, isInSwamp) maxRunningSteps = 100 oneStepSampleTrajectory = OneStepSampleTrajectory(twoAgentTransit, rewardFunction) sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal, resetState, oneStepSampleTrajectory) randomPolicy = RandomPolicy(actionSpace) actionDistribution = randomPolicy() #numSimulation, selectAction, selectNextState, expand, estimateValue, backup, outputDistribution numSimulation = 5 cInit = 100 cBase = 1 scoreChild = ScoreChild(cInit, cBase) selectAction = SelectAction(scoreChild) selectNextState = SelectNextState(selectAction) uniformActionPrior = {action: 1 / 4 for action in actionSpace} getActionPrior = lambda state: uniformActionPrior initializeChildren = InitializeChildren(actionSpace, twoAgentTransit, getActionPrior) expand = Expand(isTerminal, initializeChildren) alpha = 0 C = 3 pWidening = PWidening(alpha, C) expandNewState = ExpandNextState(twoAgentTransit, pWidening) rolloutPolicy = lambda state: random.choice(actionSpace) rolloutHeuristic = lambda state: 0 #reward return sometimes grab nothing. maxRolloutStep = 10 estimateValue = RollOut(rolloutPolicy, maxRolloutStep, twoAgentTransit, rewardFunction, isTerminal, rolloutHeuristic) mctsSelectAction = MCTS(numSimulation, selectAction, selectNextState, expand, expandNewState, estimateValue, backup, establishPlainActionDist) #sampleAction = SampleFromDistribution(actionDictionary) def sampleAction(state): actionDist = mctsSelectAction(state) action = maxFromDistribution(actionDist) return action trajectories = [sampleTrajecoty(sampleAction) for _ in range(1)] #print(findCumulativeReward(trajectories)) print(trajectories) DIRNAME = os.path.dirname(__file__) trajectoryDirectory = os.path.join(DIRNAME, '..', '..', 'data', 'evaluateObstacle2', 'trajectories') if not os.path.exists(trajectoryDirectory): os.makedirs(trajectoryDirectory) # generate demo image screenWidth = 600 screenHeight = 600 screen = pg.display.set_mode((screenWidth, screenHeight)) screenColor = THECOLORS['black'] xBoundary = [0, 600] yBoundary = [0, 600] lineColor = THECOLORS['white'] lineWidth = 4 xSwamp = [300, 400] ySwamp = [300, 400] drawBackground = DrawBackground(screen, screenColor, xBoundary, yBoundary, lineColor, lineWidth, xSwamp, ySwamp) fps = 40 circleColorSpace = np.array([[0, 0, 255], [0, 255, 255]]) circleSize = 10 positionIndex = [0, 1] saveImage = True imageSavePath = os.path.join(trajectoryDirectory, 'picMovingSheep') if not os.path.exists(imageSavePath): os.makedirs(imageSavePath) trajectoryParameters = 'obstacle' imageFolderName = str(trajectoryParameters) saveImageDir = os.path.join(os.path.join(imageSavePath, imageFolderName)) if not os.path.exists(saveImageDir): os.makedirs(saveImageDir) agentIdsToDraw = list(range(2)) drawState = DrawState(fps, screen, circleColorSpace, circleSize, agentIdsToDraw, positionIndex, saveImage, saveImageDir, drawBackground) numFramesToInterpolate = 3 interpolateState = InterpolateState(numFramesToInterpolate, twoAgentTransit) stateIndexInTimeStep = 0 actionIndexInTimeStep = 1 chaseTrial = ChaseTrialWithTraj(stateIndexInTimeStep, drawState, interpolateState, actionIndexInTimeStep) [chaseTrial(trajectory) for trajectory in trajectories] pg.quit()
def main(): # MDP Env xBoundary = [0, 600] yBoundary = [0, 600] xSwamp = [300, 400] ySwamp = [300, 400] swamp = [[[300,400],[300,400]]] noise = [1, 1] stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(xBoundary, yBoundary) transitionWithNoise = TransitionWithNoise(noise) minDistance = 50 target = [200, 200] isTerminal = IsTerminal(minDistance, target) isInSwamp = IsInSwamp(swamp) singleAgentTransit = MovingAgentTransitionInSwampWorld(transitionWithNoise, stayInBoundaryByReflectVelocity, isTerminal) transitionFunctionPack = [singleAgentTransit, static] multiAgentTransition = MultiAgentTransitionInGeneral(transitionFunctionPack) twoAgentTransit = MultiAgentTransitionInSwampWorld(multiAgentTransition, target) numOfAgent = 2 xBoundaryReset = [500, 600] yBoundaryReset = [0, 100] resetState = Reset(xBoundaryReset, yBoundaryReset, numOfAgent, target) actionSpace = [[10, 0], [-10, 0], [-10, -10], [10, 10], [0, 10], [0, -10], [-10, 10], [10, -10]] actionCost = -1 swampPenalty = -10 terminalReward = 10 rewardFunction = RewardFunction(actionCost, terminalReward, swampPenalty, isTerminal, isInSwamp) maxRunningSteps = 100 oneStepSampleTrajectory = OneStepSampleTrajectory(twoAgentTransit, rewardFunction) sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal, resetState, oneStepSampleTrajectory) randomPolicy = RandomPolicy(actionSpace) actionDistribution = randomPolicy() sampleAction = SampleFromDistribution(actionDistribution) trajectories = [sampleTrajecoty(sampleAction) for _ in range(10)] DIRNAME = os.path.dirname(__file__) trajectoryDirectory = os.path.join(DIRNAME, '..', '..', 'data', 'evaluateObstacle', 'trajectories') if not os.path.exists(trajectoryDirectory): os.makedirs(trajectoryDirectory) # generate demo image screenWidth = 600 screenHeight = 600 screen = pg.display.set_mode((screenWidth, screenHeight)) screenColor = THECOLORS['black'] xBoundary = [0, 600] yBoundary = [0, 600] lineColor = THECOLORS['white'] lineWidth = 4 xSwamp=[300,400] ySwamp=[300,400] drawBackground = DrawBackground(screen, screenColor, xBoundary, yBoundary, lineColor, lineWidth, xSwamp, ySwamp) fps=40 circleColorSpace = np.array([[0, 0, 255], [0, 255, 255] ]) circleSize = 10 positionIndex = [0, 1] saveImage = True imageSavePath = os.path.join(trajectoryDirectory, 'picMovingSheep') if not os.path.exists(imageSavePath): os.makedirs(imageSavePath) trajectoryParameters = 'obstacle' imageFolderName = str(trajectoryParameters) saveImageDir = os.path.join(os.path.join(imageSavePath, imageFolderName)) if not os.path.exists(saveImageDir): os.makedirs(saveImageDir) agentIdsToDraw = list(range(2)) drawState = DrawState(fps, screen, circleColorSpace, circleSize, agentIdsToDraw, positionIndex, saveImage, saveImageDir, drawBackground) numFramesToInterpolate = 3 interpolateState = InterpolateState(numFramesToInterpolate, twoAgentTransit) stateIndexInTimeStep = 0 actionIndexInTimeStep = 1 chaseTrial = ChaseTrialWithTraj(stateIndexInTimeStep, drawState, interpolateState, actionIndexInTimeStep) [chaseTrial(trajectory) for trajectory in trajectories] pg.quit()
def testRewardFunctionCompete(self, state, action, newState, result): findReward = RewardFunction(self.actionCost, self.terminalReward, self.swampPenalty, self.isTerminal, self.isInSwamp) checkReward = findReward(state, action, newState) self.assertEqual(checkReward, result)
def __call__(self, parameters): print(parameters) numSimulation = parameters['numSimulation'] xBoundary = [0, 600] yBoundary = [0, 600] xSwamp = [300, 400] ySwamp = [300, 400] swamp = [[[300, 400], [300, 400]]] noise = parameters['noise'] stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) transitionWithNoise = TransitionWithNoise(noise) minDistance = 50 target = [600, 600] isTerminal = IsTerminal(minDistance, target) isInSwamp = IsInSwamp(swamp) singleAgentTransit = MovingAgentTransitionInSwampWorld( transitionWithNoise, stayInBoundaryByReflectVelocity, isTerminal) transitionFunctionPack = [singleAgentTransit, static] multiAgentTransition = MultiAgentTransitionInGeneral( transitionFunctionPack) twoAgentTransit = MultiAgentTransitionInSwampWorld( multiAgentTransition, target) numOfAgent = 2 xBoundaryReset = [500, 600] yBoundaryReset = [0, 100] resetState = Reset([0, 0], [0, 0], numOfAgent, target) actionSpace = [(100, 0), (-100, 0), (0, 100), (0, -100)] #k = np.random.choice(actionSpace) #print(k) actionCost = -1 swampPenalty = -100 terminalReward = 1000 rewardFunction = RewardFunction(actionCost, terminalReward, swampPenalty, isTerminal, isInSwamp) maxRunningSteps = 100 oneStepSampleTrajectory = OneStepSampleTrajectory( twoAgentTransit, rewardFunction) sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal, resetState, oneStepSampleTrajectory) randomPolicy = RandomPolicy(actionSpace) actionDistribution = randomPolicy() #numSimulation, selectAction, selectNextState, expand, estimateValue, backup, outputDistribution # numSimulation = 50 cInit = 100 cBase = 1 scoreChild = ScoreChild(cInit, cBase) selectAction = SelectAction(scoreChild) selectNextState = SelectNextState(selectAction) uniformActionPrior = {action: 1 / 4 for action in actionSpace} getActionPrior = lambda state: uniformActionPrior initializeChildren = InitializeChildren(actionSpace, twoAgentTransit, getActionPrior) expand = Expand(isTerminal, initializeChildren) expandNewState = ExpandNextState(twoAgentTransit) rolloutPolicy = lambda state: random.choice(actionSpace) rolloutHeuristic = lambda state: 0 #reward return sometimes grab nothing. maxRolloutStep = 100 estimateValue = RollOut(rolloutPolicy, maxRolloutStep, twoAgentTransit, rewardFunction, isTerminal, rolloutHeuristic) mctsSelectAction = MCTS(numSimulation, selectAction, selectNextState, expand, expandNewState, estimateValue, backup, establishPlainActionDist) #sampleAction = SampleFromDistribution(actionDictionary) def sampleAction(state): actionDist = mctsSelectAction(state) action = maxFromDistribution(actionDist) return action trajectoriesWithIntentionDists = [] for trajectoryId in range(self.numTrajectories): trajectory = sampleTrajecoty(sampleAction) trajectoriesWithIntentionDists.append(trajectory) print(trajectoriesWithIntentionDists) trajectoryFixedParameters = {'Algorithm': "MCTS"} self.saveTrajectoryByParameters(trajectoriesWithIntentionDists, trajectoryFixedParameters, parameters)
def __call__(self, parameters): print(parameters) numSimulation = parameters['numSimulation'] rolloutHeuristic = parameters['rolloutHeuristic'] xBoundary = [0, 400] yBoundary = [0, 400] xSwamp = [300, 400] ySwamp = [300, 400] swamp = [[[175, 225], [175, 225]]] noise = parameters['noise'] cBase = parameters['cBase'] maxRolloutStep = parameters['maxRolloutStep'] stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) transitionWithNoise = TransitionWithNoise(noise) minDistance = 40 target = [400, 400] isTerminal = IsTerminal(minDistance, target) isInSwamp = IsInSwamp(swamp) singleAgentTransit = MovingAgentTransitionInSwampWorld( transitionWithNoise, stayInBoundaryByReflectVelocity, isTerminal) transitionFunctionPack = [singleAgentTransit, static] multiAgentTransition = MultiAgentTransitionInGeneral( transitionFunctionPack) twoAgentTransit = MultiAgentTransitionInSwampWorld( multiAgentTransition, target) maxRolloutStep = 15 numOfAgent = 2 xBoundaryReset = [500, 600] yBoundaryReset = [0, 100] resetState = Reset([0, 400], [0, 400], numOfAgent, target) actionSpace = [(50, 0), (-50, 0), (0, 50), (0, -50)] actionCost = -0.02 swampPenalty = -0.5 terminalReward = 1 rewardFunction = RewardFunction(actionCost, terminalReward, swampPenalty, isTerminal, isInSwamp) maxRunningSteps = 50 oneStepSampleTrajectory = OneStepSampleTrajectory( twoAgentTransit, rewardFunction) sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal, resetState, oneStepSampleTrajectory) randomPolicy = RandomPolicy(actionSpace) actionDistribution = randomPolicy() cInit = 1 #cBase =100 scoreChild = ScoreChild(cInit, cBase) selectAction = SelectAction(scoreChild) selectNextState = SelectNextState(selectAction) uniformActionPrior = {action: 1 / 4 for action in actionSpace} getActionPrior = lambda state: uniformActionPrior initializeChildren = InitializeChildren(actionSpace, twoAgentTransit, getActionPrior) expand = Expand(isTerminal, initializeChildren) expandNewState = ExpandNextState(twoAgentTransit) rolloutPolicy = lambda state: random.choice(actionSpace) def rolloutHeuristic(allState): [state, terminalPosition] = allState distanceToTerminal = np.linalg.norm(np.array(target) - np.array(state), ord=2) return (2 * np.exp(-distanceToTerminal / 100) - 1) # rolloutHeuristic = lambda state: 0#reward return sometimes grab nothing. #maxRolloutStep = 15 estimateValue = RollOut(rolloutPolicy, maxRolloutStep, twoAgentTransit, rewardFunction, isTerminal, rolloutHeuristic) mctsSelectAction = MCTS(numSimulation, selectAction, selectNextState, expand, expandNewState, estimateValue, backup, establishPlainActionDist) def sampleAction(state): actionDist = mctsSelectAction(state) action = maxFromDistribution(actionDist) return action trajectoriesWithIntentionDists = [] for trajectoryId in range(self.numTrajectories): trajectory = sampleTrajecoty(sampleAction) trajectoriesWithIntentionDists.append(trajectory) #print(trajectoriesWithIntentionDists) trajectoryFixedParameters = {'Algorithm': "MCTSNew"} self.saveTrajectoryByParameters(trajectoriesWithIntentionDists, trajectoryFixedParameters, parameters)
def main(): # MDP Env xBoundary = [0, 600] yBoundary = [0, 600] xSwamp = [300, 400] ySwamp = [300, 400] swamp = [[[300, 400], [300, 400]]] noise = [50, 50] stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) transitionWithNoise = TransitionWithNoise(noise) minDistance = 50 target = [600, 600] isTerminal = IsTerminal(minDistance, target) isInSwamp = IsInSwamp(swamp) singleAgentTransit = MovingAgentTransitionInSwampWorld( transitionWithNoise, stayInBoundaryByReflectVelocity, isTerminal) transitionFunctionPack = [singleAgentTransit, static] multiAgentTransition = MultiAgentTransitionInGeneral( transitionFunctionPack) twoAgentTransit = MultiAgentTransitionInSwampWorld(multiAgentTransition, target) numOfAgent = 2 xBoundaryReset = [500, 600] yBoundaryReset = [0, 100] resetState = Reset([0, 0], [0, 0], numOfAgent, target) actionSpace = [(100, 0), (-100, 0), (0, 100), (0, -100)] #k = np.random.choice(actionSpace) #print(k) actionCost = -1 swampPenalty = -100 terminalReward = 1000 rewardFunction = RewardFunction(actionCost, terminalReward, swampPenalty, isTerminal, isInSwamp) maxRunningSteps = 100 oneStepSampleTrajectory = OneStepSampleTrajectory(twoAgentTransit, rewardFunction) sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal, resetState, oneStepSampleTrajectory) randomPolicy = RandomPolicy(actionSpace) actionDistribution = randomPolicy() #numSimulation, selectAction, selectNextState, expand, estimateValue, backup, outputDistribution numSimulation = 50 cInit = 1 cBase = 100 scoreChild = ScoreChild(cInit, cBase) selectAction = SelectAction(scoreChild) selectNextState = SelectNextState(selectAction) uniformActionPrior = {action: 1 / 4 for action in actionSpace} getActionPrior = lambda state: uniformActionPrior initializeChildren = InitializeChildren(actionSpace, twoAgentTransit, getActionPrior) expand = Expand(isTerminal, initializeChildren) expandNewState = ExpandNextState(twoAgentTransit) rolloutPolicy = lambda state: random.choice(actionSpace) rolloutHeuristic = lambda state: 0 #reward return sometimes grab nothing. maxRolloutStep = 100 estimateValue = RollOut(rolloutPolicy, maxRolloutStep, twoAgentTransit, rewardFunction, isTerminal, rolloutHeuristic) mctsSelectAction = MCTS(numSimulation, selectAction, selectNextState, expand, expandNewState, estimateValue, backup, establishPlainActionDist) #sampleAction = SampleFromDistribution(actionDictionary) def sampleAction(state): actionDist = mctsSelectAction(state) action = maxFromDistribution(actionDist) return action trajectories = [sampleTrajecoty(sampleAction) for _ in range(1)] DIRNAME = os.path.dirname(__file__) trajectoryDirectory = os.path.join(DIRNAME, '..', '..', 'data', 'evaluateCompeteDetection', 'trajectories') if not os.path.exists(trajectoryDirectory): os.makedirs(trajectoryDirectory) print(trajectories) print(findCumulativeReward(trajectories))