def main(): # MDP Env xBoundary = [0, 600] yBoundary = [0, 600] xSwamp = [300, 400] ySwamp = [300, 400] swamp = [[[300, 400], [300, 400]]] noise = [50, 50] stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) transitionWithNoise = TransitionWithNoise(noise) minDistance = 50 target = [600, 600] isTerminal = IsTerminal(minDistance, target) isInSwamp = IsInSwamp(swamp) singleAgentTransit = MovingAgentTransitionInSwampWorld( transitionWithNoise, stayInBoundaryByReflectVelocity, isTerminal) transitionFunctionPack = [singleAgentTransit, static] multiAgentTransition = MultiAgentTransitionInGeneral( transitionFunctionPack) twoAgentTransit = MultiAgentTransitionInSwampWorld(multiAgentTransition, target) numOfAgent = 2 xBoundaryReset = [500, 600] yBoundaryReset = [0, 100] resetState = Reset([0, 0], [0, 0], numOfAgent, target) actionSpace = [(100, 0), (-100, 0), (0, 100), (0, -100)] #k = np.random.choice(actionSpace) #print(k) actionCost = -1 swampPenalty = -100 terminalReward = 1000 rewardFunction = RewardFunction(actionCost, terminalReward, swampPenalty, isTerminal, isInSwamp) maxRunningSteps = 100 oneStepSampleTrajectory = OneStepSampleTrajectory(twoAgentTransit, rewardFunction) sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal, resetState, oneStepSampleTrajectory) randomPolicy = RandomPolicy(actionSpace) actionDistribution = randomPolicy() #numSimulation, selectAction, selectNextState, expand, estimateValue, backup, outputDistribution numSimulation = 5 cInit = 100 cBase = 1 scoreChild = ScoreChild(cInit, cBase) selectAction = SelectAction(scoreChild) selectNextState = SelectNextState(selectAction) uniformActionPrior = {action: 1 / 4 for action in actionSpace} getActionPrior = lambda state: uniformActionPrior initializeChildren = InitializeChildren(actionSpace, twoAgentTransit, getActionPrior) expand = Expand(isTerminal, initializeChildren) alpha = 0 C = 3 pWidening = PWidening(alpha, C) expandNewState = ExpandNextState(twoAgentTransit, pWidening) rolloutPolicy = lambda state: random.choice(actionSpace) rolloutHeuristic = lambda state: 0 #reward return sometimes grab nothing. maxRolloutStep = 10 estimateValue = RollOut(rolloutPolicy, maxRolloutStep, twoAgentTransit, rewardFunction, isTerminal, rolloutHeuristic) mctsSelectAction = MCTS(numSimulation, selectAction, selectNextState, expand, expandNewState, estimateValue, backup, establishPlainActionDist) #sampleAction = SampleFromDistribution(actionDictionary) def sampleAction(state): actionDist = mctsSelectAction(state) action = maxFromDistribution(actionDist) return action trajectories = [sampleTrajecoty(sampleAction) for _ in range(1)] #print(findCumulativeReward(trajectories)) print(trajectories) DIRNAME = os.path.dirname(__file__) trajectoryDirectory = os.path.join(DIRNAME, '..', '..', 'data', 'evaluateObstacle2', 'trajectories') if not os.path.exists(trajectoryDirectory): os.makedirs(trajectoryDirectory) # generate demo image screenWidth = 600 screenHeight = 600 screen = pg.display.set_mode((screenWidth, screenHeight)) screenColor = THECOLORS['black'] xBoundary = [0, 600] yBoundary = [0, 600] lineColor = THECOLORS['white'] lineWidth = 4 xSwamp = [300, 400] ySwamp = [300, 400] drawBackground = DrawBackground(screen, screenColor, xBoundary, yBoundary, lineColor, lineWidth, xSwamp, ySwamp) fps = 40 circleColorSpace = np.array([[0, 0, 255], [0, 255, 255]]) circleSize = 10 positionIndex = [0, 1] saveImage = True imageSavePath = os.path.join(trajectoryDirectory, 'picMovingSheep') if not os.path.exists(imageSavePath): os.makedirs(imageSavePath) trajectoryParameters = 'obstacle' imageFolderName = str(trajectoryParameters) saveImageDir = os.path.join(os.path.join(imageSavePath, imageFolderName)) if not os.path.exists(saveImageDir): os.makedirs(saveImageDir) agentIdsToDraw = list(range(2)) drawState = DrawState(fps, screen, circleColorSpace, circleSize, agentIdsToDraw, positionIndex, saveImage, saveImageDir, drawBackground) numFramesToInterpolate = 3 interpolateState = InterpolateState(numFramesToInterpolate, twoAgentTransit) stateIndexInTimeStep = 0 actionIndexInTimeStep = 1 chaseTrial = ChaseTrialWithTraj(stateIndexInTimeStep, drawState, interpolateState, actionIndexInTimeStep) [chaseTrial(trajectory) for trajectory in trajectories] pg.quit()
def main(): # MDP Env xBoundary = [0, 600] yBoundary = [0, 600] xSwamp = [300, 400] ySwamp = [300, 400] swamp = [[[300,400],[300,400]]] noise = [1, 1] stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(xBoundary, yBoundary) transitionWithNoise = TransitionWithNoise(noise) minDistance = 50 target = [200, 200] isTerminal = IsTerminal(minDistance, target) isInSwamp = IsInSwamp(swamp) singleAgentTransit = MovingAgentTransitionInSwampWorld(transitionWithNoise, stayInBoundaryByReflectVelocity, isTerminal) transitionFunctionPack = [singleAgentTransit, static] multiAgentTransition = MultiAgentTransitionInGeneral(transitionFunctionPack) twoAgentTransit = MultiAgentTransitionInSwampWorld(multiAgentTransition, target) numOfAgent = 2 xBoundaryReset = [500, 600] yBoundaryReset = [0, 100] resetState = Reset(xBoundaryReset, yBoundaryReset, numOfAgent, target) actionSpace = [[10, 0], [-10, 0], [-10, -10], [10, 10], [0, 10], [0, -10], [-10, 10], [10, -10]] actionCost = -1 swampPenalty = -10 terminalReward = 10 rewardFunction = RewardFunction(actionCost, terminalReward, swampPenalty, isTerminal, isInSwamp) maxRunningSteps = 100 oneStepSampleTrajectory = OneStepSampleTrajectory(twoAgentTransit, rewardFunction) sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal, resetState, oneStepSampleTrajectory) randomPolicy = RandomPolicy(actionSpace) actionDistribution = randomPolicy() sampleAction = SampleFromDistribution(actionDistribution) trajectories = [sampleTrajecoty(sampleAction) for _ in range(10)] DIRNAME = os.path.dirname(__file__) trajectoryDirectory = os.path.join(DIRNAME, '..', '..', 'data', 'evaluateObstacle', 'trajectories') if not os.path.exists(trajectoryDirectory): os.makedirs(trajectoryDirectory) # generate demo image screenWidth = 600 screenHeight = 600 screen = pg.display.set_mode((screenWidth, screenHeight)) screenColor = THECOLORS['black'] xBoundary = [0, 600] yBoundary = [0, 600] lineColor = THECOLORS['white'] lineWidth = 4 xSwamp=[300,400] ySwamp=[300,400] drawBackground = DrawBackground(screen, screenColor, xBoundary, yBoundary, lineColor, lineWidth, xSwamp, ySwamp) fps=40 circleColorSpace = np.array([[0, 0, 255], [0, 255, 255] ]) circleSize = 10 positionIndex = [0, 1] saveImage = True imageSavePath = os.path.join(trajectoryDirectory, 'picMovingSheep') if not os.path.exists(imageSavePath): os.makedirs(imageSavePath) trajectoryParameters = 'obstacle' imageFolderName = str(trajectoryParameters) saveImageDir = os.path.join(os.path.join(imageSavePath, imageFolderName)) if not os.path.exists(saveImageDir): os.makedirs(saveImageDir) agentIdsToDraw = list(range(2)) drawState = DrawState(fps, screen, circleColorSpace, circleSize, agentIdsToDraw, positionIndex, saveImage, saveImageDir, drawBackground) numFramesToInterpolate = 3 interpolateState = InterpolateState(numFramesToInterpolate, twoAgentTransit) stateIndexInTimeStep = 0 actionIndexInTimeStep = 1 chaseTrial = ChaseTrialWithTraj(stateIndexInTimeStep, drawState, interpolateState, actionIndexInTimeStep) [chaseTrial(trajectory) for trajectory in trajectories] pg.quit()
def main(): DIRNAME = os.path.dirname(__file__) trajectoryDirectory = os.path.join(DIRNAME, '..', '..', 'data', 'evaluateHierarchyPlanning', 'trajectories') if not os.path.exists(trajectoryDirectory): os.makedirs(trajectoryDirectory) NNNumSimulations = 250 maxRunningSteps = 52 softParameterInPlanningForSheep = 2.5 softParameterInPlanning = 2.5 hierarchy = 0 trajectoryFixedParameters = {'sheepPolicySoft': softParameterInPlanningForSheep, 'wolfPolicySoft': softParameterInPlanning, 'maxRunningSteps': maxRunningSteps, 'hierarchy': hierarchy, 'NNNumSimulations':NNNumSimulations} trajectoryExtension = '.pickle' getTrajectorySavePath = GetSavePath(trajectoryDirectory, trajectoryExtension, trajectoryFixedParameters) # Compute Statistics on the Trajectories loadTrajectories = LoadTrajectories(getTrajectorySavePath, loadFromPickle) numWolves = 2 numSheep = 2 valuePriorEndTime = -100 valuePriorSoftMaxBeta = 0.0 trajectoryParameters = {'numWolves': numWolves, 'numSheep': numSheep, 'valuePriorEndTime': valuePriorEndTime, 'valuePriorSoftMaxBeta': valuePriorSoftMaxBeta} wolfType = 'sharedReward' #trajectoryParameters.update({'wolfType': wolfType}) trajectories = loadTrajectories(trajectoryParameters) # generate demo image screenWidth = 600 screenHeight = 600 screen = pg.display.set_mode((screenWidth, screenHeight)) screenColor = THECOLORS['black'] xBoundary = [0, 600] yBoundary = [0, 600] lineColor = THECOLORS['white'] lineWidth = 4 drawBackground = DrawBackground(screen, screenColor, xBoundary, yBoundary, lineColor, lineWidth) FPS = 32 circleColorSpace = [[100, 100, 100]]*numSheep + [[255, 255, 255]] * numWolves circleSize = 10 positionIndex = [0, 1] agentIdsToDraw = list(range(numSheep + numWolves)) saveImage = True imageSavePath = os.path.join(trajectoryDirectory, 'picMovingSheep') if not os.path.exists(imageSavePath): os.makedirs(imageSavePath) imageFolderName = str('forDemo') saveImageDir = os.path.join(os.path.join(imageSavePath, imageFolderName)) if not os.path.exists(saveImageDir): os.makedirs(saveImageDir) goalSpace = list(range(numSheep)) imaginedWeIdsForInferenceSubject = list(range(numSheep, numWolves + numSheep)) softParameter = 1 softFunction = SoftDistribution(softParameter) updateColorSpaceByPosterior = lambda colorSpace, posterior : updateColorSpace( colorSpace, [softFunction(individualPosterior) for individualPosterior in posterior], goalSpace, imaginedWeIdsForInferenceSubject) #updateColorSpaceByPosterior = lambda originalColorSpace, posterior : originalColorSpace outsideCircleAgentIds = imaginedWeIdsForInferenceSubject outsideCircleColor = np.array([[255, 0, 0]] * numWolves) outsideCircleSize = 15 drawCircleOutside = DrawCircleOutside(screen, outsideCircleAgentIds, positionIndex, outsideCircleColor, outsideCircleSize) drawState = DrawState(FPS, screen, circleColorSpace, circleSize, agentIdsToDraw, positionIndex, saveImage, saveImageDir, drawBackground, updateColorSpaceByPosterior, drawCircleOutside) # MDP Env xBoundary = [0,600] yBoundary = [0,600] stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity(xBoundary, yBoundary) transit = InterpolateOneFrame(stayInBoundaryByReflectVelocity) numFramesToInterpolate = 7 interpolateState = InterpolateState(numFramesToInterpolate, transit) stateIndexInTimeStep = 0 actionIndexInTimeStep = 1 posteriorIndexInTimeStep = 4 chaseTrial = ChaseTrialWithTraj(stateIndexInTimeStep, drawState, interpolateState, actionIndexInTimeStep, posteriorIndexInTimeStep) print(len(trajectories)) lens = [len(trajectory) for trajectory in trajectories] index = np.argsort(-np.array(lens)) print(index) print(trajectories[0][1]) [chaseTrial(trajectory) for trajectory in np.array(trajectories)[index[0:10]]] print([len(trajectory) for trajectory in np.array(trajectories)[index[:]]])
def main(): DIRNAME = os.path.dirname(__file__) trajectoryDirectory = os.path.join(DIRNAME, '..', '..', 'data', 'evaluateCompeteDetection', 'trajectories') if not os.path.exists(trajectoryDirectory): os.makedirs(trajectoryDirectory) NNNumSimulations = 250 maxRunningSteps = 61 softParameterInPlanningForSheep = 2.0 softParameterInPlanning = 2.0 trajectoryFixedParameters = { 'sheepPolicySoft': softParameterInPlanningForSheep, 'wolfPolicySoft': softParameterInPlanning, 'maxRunningSteps': maxRunningSteps, 'NNNumSimulations': NNNumSimulations } trajectoryExtension = '.pickle' getTrajectorySavePath = GetSavePath(trajectoryDirectory, trajectoryExtension, trajectoryFixedParameters) # Compute Statistics on the Trajectories loadTrajectories = LoadTrajectories(getTrajectorySavePath, loadFromPickle) numWolves = 2 numSheep = 1 competePolicy = 'heatseeking' heatseekingPrecesion = 1.83 otherCompeteRate = 1.0 competeDetectionRate = 0.5 inferenceSoft = 0.05 trajectoryParameters = { 'heatseekingPrecesion': heatseekingPrecesion, 'inferenceSoft': inferenceSoft, 'numWolves': numWolves, 'numSheep': numSheep, 'competePolicy': competePolicy, 'otherCompeteRate': otherCompeteRate, 'competeDetectionRate': competeDetectionRate } trajectories = loadTrajectories(trajectoryParameters) # generate demo image screenWidth = 600 screenHeight = 600 screen = pg.display.set_mode((screenWidth, screenHeight)) screenColor = THECOLORS['black'] xBoundary = [0, 600] yBoundary = [0, 600] lineColor = THECOLORS['white'] lineWidth = 4 drawBackground = DrawBackground(screen, screenColor, xBoundary, yBoundary, lineColor, lineWidth) FPS = 24 circleColorSpace = [[0, 255, 0]] * numSheep + [[255, 0, 0]] * numWolves circleSize = 10 positionIndex = [0, 1] agentIdsToDraw = list(range(numSheep + numWolves)) #saveImage = False saveImage = True imageSavePath = os.path.join(trajectoryDirectory, 'picMovingSheep') if not os.path.exists(imageSavePath): os.makedirs(imageSavePath) imageFolderName = str('forDemo') saveImageDir = os.path.join(os.path.join(imageSavePath, imageFolderName)) if not os.path.exists(saveImageDir): os.makedirs(saveImageDir) goalSpace = list(range(numSheep)) imaginedWeIdsForInferenceSubject = list( range(numSheep, numWolves + numSheep)) softParameter = 1.1 softFunction = SoftDistribution(softParameter) selfPosteriorIndex = 0 concernedAgentId = 2 competeIntention = (0, ()) cooperateIntention = (0, tuple(range(numSheep, numSheep + numWolves))) updateColorSpaceByPosterior = lambda colorSpace, posterior: updateColorSpace( colorSpace, [ softFunction(individualPosterior) for individualPosterior in posterior ], selfPosteriorIndex, concernedAgentId, competeIntention, cooperateIntention) #updateColorSpaceByPosterior = lambda originalColorSpace, posterior : originalColorSpace outsideCircleAgentIds = imaginedWeIdsForInferenceSubject outsideCircleColor = np.array([[0, 0, 0]] * numWolves) outsideCircleSize = 15 drawCircleOutside = DrawCircleOutside(screen, outsideCircleAgentIds, positionIndex, outsideCircleColor, outsideCircleSize) drawState = DrawState(FPS, screen, circleColorSpace, circleSize, agentIdsToDraw, positionIndex, saveImage, saveImageDir, drawBackground, updateColorSpaceByPosterior, drawCircleOutside) # MDP Env xBoundary = [0, 600] yBoundary = [0, 600] stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) transit = InterpolateOneFrame(stayInBoundaryByReflectVelocity) numFramesToInterpolate = 5 interpolateState = InterpolateState(numFramesToInterpolate, transit) stateIndexInTimeStep = 0 actionIndexInTimeStep = 1 posteriorIndexInTimeStep = 4 chaseTrial = ChaseTrialWithTraj(stateIndexInTimeStep, drawState, interpolateState, actionIndexInTimeStep, posteriorIndexInTimeStep) print(len(trajectories)) lens = [len(trajectory) for trajectory in trajectories] index = np.argsort(-np.array(lens)) print(index) print(trajectories[0][1]) #[chaseTrial(trajectory) for trajectory in np.array(trajectories)[index[0:10]]] [ chaseTrial(trajectory) for trajectory in np.array(trajectories)[index[9:10]] ]
def main(): manipulatedVariables = OrderedDict() manipulatedVariables['numOfAgent'] = [2] levelNames = list(manipulatedVariables.keys()) levelValues = list(manipulatedVariables.values()) modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=modelIndex) productedValues = it.product( *[[(key, value) for value in values] for key, values in manipulatedVariables.items()]) parametersAllCondtion = [ dict(list(specificValueParameter)) for specificValueParameter in productedValues ] numTrajectories = 3 sampleTrajectoriesForConditions = SampleTrajectoriesForCoditions( numTrajectories, composeFowardOneTimeStepWithRandomSubtlety) trajectoriesMultipleConditions = [ sampleTrajectoriesForConditions(para) for para in parametersAllCondtion ] visualConditionIndex = 0 trajectoriesToVisualize = trajectoriesMultipleConditions[ visualConditionIndex] visualize = True if visualize: screenWidth = 640 screenHeight = 480 screen = pg.display.set_mode((screenWidth, screenHeight)) screenColor = THECOLORS['black'] xBoundary = [0, 640] yBoundary = [0, 480] lineColor = THECOLORS['white'] lineWidth = 4 drawBackground = DrawBackground(screen, screenColor, xBoundary, yBoundary, lineColor, lineWidth) numOfAgent = 2 numDistractors = numOfAgent - 2 circleColorSpace = [[0, 255, 0], [255, 0, 0] ] + [[255, 255, 255]] * numDistractors circleSize = 10 positionIndex = [0, 1] agentIdsToDraw = list(range(numOfAgent)) saveImage = False dirPYFile = os.path.dirname(__file__) imageSavePath = os.path.join(dirPYFile, '..', 'data', 'forDemo') if not os.path.exists(imageSavePath): os.makedirs(imageSavePath) FPS = 30 drawState = DrawState(FPS, screen, circleColorSpace, circleSize, agentIdsToDraw, positionIndex, saveImage, imageSavePath, drawBackground) # MDP Env xBoundary = [0, 640] yBoundary = [0, 480] stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) distanceToVisualDegreeRatio = 20 killzoneRadius = 2.5 * distanceToVisualDegreeRatio sheepId = 0 wolfId = 1 isTerminal = IsTerminal(sheepId, wolfId, killzoneRadius) numMDPTimeStepPerSecond = 5 # change direction every 200ms numFramesToInterpolate = int( FPS / numMDPTimeStepPerSecond - 1 ) # interpolate each MDP timestep to multiple frames; check terminal for each frame interpolateStateForVisualization = InterpolateStateForVisualization( numFramesToInterpolate, stayInBoundaryByReflectVelocity, isTerminal) stateIndexInTimeStep = 0 actionIndexInTimeStep = 1 nextStateIndexInTimeStep = 2 visualizeTraj = VisualizeTraj(stateIndexInTimeStep, actionIndexInTimeStep, nextStateIndexInTimeStep, drawState, interpolateStateForVisualization) [visualizeTraj(trajectory) for trajectory in trajectoriesToVisualize]