def setUp(self): # Env param bound_low = 0 bound_high = 7 self.transition = TransitionFunction(bound_low, bound_high) self.action_space = [-1, 1] self.num_action_space = len(self.action_space) self.actionPrior_func = GetActionPrior(self.action_space) step_penalty = -1 catch_reward = 1 self.target_state = bound_high self.isTerminal = Terminal(self.target_state) self.c_init = 0 self.c_base = 1 self.calculateScore = CalculateScore(self.c_init, self.c_base) self.selectChild = SelectChild(self.calculateScore) init_state = 3 level1_0_state = self.transition(init_state, action=0) level1_1_state = self.transition(init_state, action=1) self.default_actionPrior = 0.5 self.root = Node(id={1: init_state}, numVisited=1, sumValue=0, actionPrior=self.default_actionPrior, isExpanded=True) self.level1_0 = Node(parent=self.root, id={0: level1_0_state}, numVisited=2, sumValue=5, actionPrior=self.default_actionPrior, isExpanded=False) self.level1_1 = Node(parent=self.root, id={1: level1_1_state}, numVisited=3, sumValue=10, actionPrior=self.default_actionPrior, isExpanded=False) self.initializeChildren = InitializeChildren(self.action_space, self.transition, self.actionPrior_func) self.expand = Expand(self.isTerminal, self.initializeChildren)
def main(): # experiment conditions maxRunningSteps = 15 numTrials = 50 manipulatedVariables = OrderedDict() manipulatedVariables['qPosInit'] = [(0.3, 0, -0.3, 0), (9.75, 0, 9.15, 0), (9.75, 9.75, 9.3, 9.3)] manipulatedVariables['sheepPolicyName'] = ['mcts', 'random'] manipulatedVariables['numSimulations'] = [5, 25, 50, 100, 250, 400] levelNames = list(manipulatedVariables.keys()) levelValues = list(manipulatedVariables.values()) modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=modelIndex) # reset function envModelName = 'twoAgents' qVelInit = [0, 0, 0, 0] numAgents = 2 qPosInitNoise = 0 qVelInitNoise = 0 # isTerminal killzoneRadius = 0.5 isTerminal = IsTerminal(killzoneRadius) # transit for multiplayer transition; sheepTransit for sheep's MCTS simulation (only takes sheep's action as input) renderOn = False numSimulationFrames = 20 transit = TransitionFunction(envModelName, isTerminal, renderOn, numSimulationFrames) sheepTransit = lambda state, action: transit( state, [action, stationaryWolfPolicy(state)]) # reward function aliveBonus = 0.05 deathPenalty = -1 rewardFunction = reward.RewardFunctionCompete(aliveBonus, deathPenalty, isTerminal) # select child cInit = 1 cBase = 100 calculateScore = CalculateScore(cInit, cBase) selectChild = SelectChild(calculateScore) # prior actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7)] getActionPrior = GetActionPrior(actionSpace) # initialize children; expand initializeChildren = InitializeChildren(actionSpace, sheepTransit, getActionPrior) expand = Expand(isTerminal, initializeChildren) # random rollout policy numActionSpace = len(actionSpace) rolloutPolicy = lambda state: actionSpace[np.random.choice( range(numActionSpace))] # select next action selectNextAction = SelectNextAction(sheepTransit) sheepId = 0 wolfId = 1 xPosIndex = 2 numXPosEachAgent = 2 getSheepXPos = GetAgentPos(sheepId, xPosIndex, numXPosEachAgent) getWolfXPos = GetAgentPos(wolfId, xPosIndex, numXPosEachAgent) # rollout rolloutHeuristicWeight = 0 maxRolloutSteps = 10 rolloutHeuristic = HeuristicDistanceToTarget(rolloutHeuristicWeight, getWolfXPos, getSheepXPos) rollout = RollOut(rolloutPolicy, maxRolloutSteps, sheepTransit, rewardFunction, isTerminal, rolloutHeuristic) # wrapper function getReset = lambda qPosInit: Reset(envModelName, qPosInit, qVelInit, numAgents, qPosInitNoise, qVelInitNoise) # All agents' policies getMCTS = lambda numSimulations: MCTS(numSimulations, selectChild, expand, rollout, backup, selectNextAction) sheepPolicyRandom = lambda state: actionSpace[np.random.choice( range(numActionSpace))] getSheepPolicy = GetSheepPolicy(getMCTS, sheepPolicyRandom) wolfActionMagnitude = 5 wolfPolicyDirectlyTowardsSheep = WolfPolicyForceDirectlyTowardsSheep( getSheepXPos, getWolfXPos, wolfActionMagnitude) prepareAllAgentsPolicy = PrepareAllAgentsPolicy( getSheepPolicy, wolfPolicyDirectlyTowardsSheep) # sample trajectory getSampleTrajectory = lambda reset: SampleTrajectory( maxRunningSteps, transit, isTerminal, reset) # function to generate and save trajectories dataDirectory = '../data/testMCTSvsRandomInSheepEscape/trajectories' if not os.path.exists(dataDirectory): os.makedirs(dataDirectory) getSaveFileName = GetSaveFileName(dataDirectory, tupleToString) generateTrajectoriesAndComputeStatistics = GenerateTrajectoriesAndComputeStatistics( getReset, getSampleTrajectory, prepareAllAgentsPolicy, numTrials, getSaveFileName) # run all trials and save trajectories statisticsDf = toSplitFrame.groupby(levelNames).apply( generateTrajectoriesAndComputeStatistics) # plot the statistics fig = plt.figure() plotRowNum = 1 plotColNum = 3 plotCounter = 1 for (key, dataDf) in statisticsDf.groupby('qPosInit'): dataDf.index = dataDf.index.droplevel('qPosInit') axForDraw = fig.add_subplot(plotRowNum, plotColNum, plotCounter) drawPerformanceLine(dataDf, axForDraw, str(key)) plotCounter += 1 plt.legend(loc='best') plt.show()
def evaluate(cInit, cBase): actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7)] numActionSpace = len(actionSpace) getActionPrior = GetActionPrior(actionSpace) numStateSpace = 4 initSheepPosition = np.array([90, 90]) initWolfPosition = np.array([90, 90]) initSheepVelocity = np.array([0, 0]) initWolfVelocity = np.array([0, 0]) initSheepPositionNoise = np.array([40, 60]) initWolfPositionNoise = np.array([0, 20]) sheepPositionReset = ag.SheepPositionReset(initSheepPosition, initSheepPositionNoise) wolfPositionReset = ag.WolfPositionReset(initWolfPosition, initWolfPositionNoise) numOneAgentState = 2 positionIndex = [0, 1] xBoundary = [0, 180] yBoundary = [0, 180] checkBoundaryAndAdjust = ag.CheckBoundaryAndAdjust(xBoundary, yBoundary) sheepPositionTransition = ag.SheepPositionTransition( numOneAgentState, positionIndex, checkBoundaryAndAdjust) wolfSpeed = 7 wolfPositionTransition = ag.WolfPositionTransition(numOneAgentState, positionIndex, checkBoundaryAndAdjust, wolfSpeed) numAgent = 2 sheepId = 0 wolfId = 1 transition = env.TransitionFunction(sheepId, wolfId, sheepPositionReset, wolfPositionReset, sheepPositionTransition, wolfPositionTransition) minDistance = 10 isTerminal = env.IsTerminal(sheepId, wolfId, numOneAgentState, positionIndex, minDistance) screen = pg.display.set_mode([xBoundary[1], yBoundary[1]]) screenColor = [255, 255, 255] circleColorList = [[50, 255, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50], [50, 50, 50]] circleSize = 8 saveImage = True saveImageFile = 'image' render = env.Render(numAgent, numOneAgentState, positionIndex, screen, screenColor, circleColorList, circleSize, saveImage, saveImageFile) aliveBouns = 0.05 deathPenalty = -1 rewardFunction = reward.RewardFunctionTerminalPenalty( sheepId, wolfId, numOneAgentState, positionIndex, aliveBouns, deathPenalty, isTerminal) # Hyper-parameters numSimulations = 600 maxRunningSteps = 70 # MCTS algorithm # Select child calculateScore = CalculateScore(cInit, cBase) selectChild = SelectChild(calculateScore) # expand initializeChildren = InitializeChildren(actionSpace, transition, getActionPrior) expand = Expand(transition, isTerminal, initializeChildren) #selectNextRoot = selectNextRoot # Rollout rolloutPolicy = lambda state: actionSpace[np.random.choice( range(numActionSpace))] maxRollOutSteps = 50 rollout = RollOut(rolloutPolicy, maxRollOutSteps, transition, rewardFunction, isTerminal) mcts = MCTS(numSimulations, selectChild, expand, rollout, backup, selectNextRoot) runMCTS = RunMCTS(mcts, maxRunningSteps, isTerminal, render) rootAction = actionSpace[np.random.choice(range(numActionSpace))] numTestingIterations = 70 episodeLengths = [] for step in range(numTestingIterations): import datetime print(datetime.datetime.now()) state, action = None, None initState = transition(state, action) #optimal = math.ceil((np.sqrt(np.sum(np.power(initState[0:2] - initState[2:4], 2))) - minDistance )/10) rootNode = Node(id={rootAction: initState}, num_visited=0, sum_value=0, is_expanded=True) episodeLength = runMCTS(rootNode) episodeLengths.append(episodeLength) meanEpisodeLength = np.mean(episodeLengths) print("mean episode length is", meanEpisodeLength) return [meanEpisodeLength]
def main(): # comments for explanation startTime = datetime.now() # experiment conditions maxRunningSteps = 1 numTrials = 200 numSimulations = 200 manipulatedVariables = OrderedDict() manipulatedVariables['yCoordinate'] = [-9.75, -5, 0, 5, 9.75] manipulatedVariables['trialIndex'] = list( range(numTrials)) # this is not being manipulated levelNames = list(manipulatedVariables.keys()) levelValues = list(manipulatedVariables.values()) modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=modelIndex) # reset function envModelName = 'twoAgents' qVelInit = [0, 0, 0, 0] numAgents = 2 qPosInitNoise = 0 qVelInitNoise = 0 getQPosFromYCoordinate = lambda yCoordinate: [ -9.75, yCoordinate, -9.15, yCoordinate ] # isTerminal killzoneRadius = 0.5 isTerminal = IsTerminal(killzoneRadius) # transit for multiplayer transition # sheepTransit for sheep's MCTS simulation (only takes sheep's action as input) renderOn = False numSimulationFrames = 20 transit = TransitionFunction(envModelName, isTerminal, renderOn, numSimulationFrames) sheepTransit = lambda state, action: transit( state, [action, stationaryWolfPolicy(state)]) # reward function aliveBonus = 0.05 deathPenalty = -1 rewardFunction = reward.RewardFunctionCompete(aliveBonus, deathPenalty, isTerminal) # select child cInit = 1 cBase = 100 calculateScore = CalculateScore(cInit, cBase) selectChild = SelectChild(calculateScore) # prior actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7)] getActionPrior = GetActionPrior(actionSpace) # initialize children; expand initializeChildren = InitializeChildren(actionSpace, sheepTransit, getActionPrior) expand = Expand(isTerminal, initializeChildren) # random rollout policy numActionSpace = len(actionSpace) rolloutPolicy = lambda state: actionSpace[np.random.choice( range(numActionSpace))] # select next action selectNextAction = SelectNextAction(sheepTransit) sheepId = 0 wolfId = 1 xPosIndex = 2 numXPosEachAgent = 2 getSheepXPos = GetAgentPos(sheepId, xPosIndex, numXPosEachAgent) getWolfXPos = GetAgentPos(wolfId, xPosIndex, numXPosEachAgent) # rollout rolloutHeuristicWeight = 0 maxRolloutSteps = 10 rolloutHeuristic = HeuristicDistanceToTarget(rolloutHeuristicWeight, getWolfXPos, getSheepXPos) rollout = RollOut(rolloutPolicy, maxRolloutSteps, sheepTransit, rewardFunction, isTerminal, rolloutHeuristic) # wrapper function getReset = lambda yCoordinate: Reset( envModelName, getQPosFromYCoordinate( yCoordinate), qVelInit, numAgents, qPosInitNoise, qVelInitNoise) # All agents' policies sheepPolicyMCTS = MCTS(numSimulations, selectChild, expand, rollout, backup, selectNextAction) wolfActionMagnitude = 5 wolfPolicyDirectlyTowardsSheep = WolfPolicyForceDirectlyTowardsSheep( getSheepXPos, getWolfXPos, wolfActionMagnitude) allAgentsPolicies = [sheepPolicyMCTS, wolfPolicyDirectlyTowardsSheep] policy = lambda state: [ agentPolicy(state) for agentPolicy in allAgentsPolicies ] # sample trajectory getSampleTrajectory = lambda reset: SampleTrajectory( maxRunningSteps, transit, isTerminal, reset) runTrial = RunTrial(getReset, getSampleTrajectory, policy) # run all trials and generate trajectories trialDataDf = toSplitFrame.groupby(levelNames).apply( runTrial ) # call this save trajectory (also take the number of trials for this condition) pickle_out = open('trajectories/testSheepFirstStepEscape.pickle', 'wb') # save as csv instead of pickle pickle.dump(trialDataDf, pickle_out) # save each condition in a separate file pickle_out.close() # keep appending print('trialDataDf') print(trialDataDf) # meausurement function timeStep = 0 getSheepActionFromAllAgentsActions = lambda allAgentActions: allAgentActions[ sheepId] getFirstTrajectoryFromDf = lambda trajectoryDf: trajectoryDf.values[0][0] getAllAgentActionsFromTrajectory = lambda trajectory, timeStep: trajectory[ timeStep][1] getSheepFirstActionFromTrajectoryDf = GetAgentActionFromTrajectoryDf( getFirstTrajectoryFromDf, timeStep, getSheepActionFromAllAgentsActions, getAllAgentActionsFromTrajectory) # make measurements on individual trial measurementDf = trialDataDf.groupby(levelNames).apply( getSheepFirstActionFromTrajectoryDf) pickle_out = open('measurements/testQEffectOnMCTSMujoco.pickle', 'wb') pickle.dump(measurementDf, pickle_out) pickle_out.close() print('measurementDf') print(measurementDf) # # compute statistics on the measurements (mean, standard error) dataStatisticsDf = measurementDf.groupby('yCoordinate') # pickle_out = open('statistics/testSheepFirstStepEscape.pickle', 'wb') # pickle.dump(dataStatisticsDf, pickle_out) # pickle_out.close() # plot the statistics fig = plt.figure() plotRowNum = 5 plotColNum = 1 plotCounter = 1 for (key, dataDf) in dataStatisticsDf: axForDraw = fig.add_subplot(plotRowNum, plotColNum, plotCounter) drawHistogram(dataDf, axForDraw) plotCounter += 1 plt.legend(loc='best') plt.show() endTime = datetime.now() print("Time taken: ", endTime - startTime)
def main(): # MDP Env xBoundary = [0, 600] yBoundary = [0, 600] xSwamp = [300, 300] ySwamp = [300, 300] swamp = [[[300, 300], [300, 300]]] noise = [1, 1] stayInBoundaryByReflectVelocity = StayInBoundaryByReflectVelocity( xBoundary, yBoundary) transitionWithNoise = TransitionWithNoise(noise) minDistance = 50 target = [200, 200] isTerminal = IsTerminal(minDistance, target) isInSwamp = IsInSwamp(swamp) singleAgentTransit = MovingAgentTransitionInSwampWorld( transitionWithNoise, stayInBoundaryByReflectVelocity, isTerminal) transitionFunctionPack = [singleAgentTransit, static] multiAgentTransition = MultiAgentTransitionInGeneral( transitionFunctionPack) twoAgentTransit = MultiAgentTransitionInSwampWorld(multiAgentTransition, target) numOfAgent = 2 xBoundaryReset = [500, 600] yBoundaryReset = [0, 100] resetState = Reset(xBoundaryReset, yBoundaryReset, numOfAgent, target) actionSpace = [(100, 0), (-100, 0), (0, 100), (0, -100)] #k = np.random.choice(actionSpace) #print(k) actionCost = -1 swampPenalty = -10 terminalReward = 100 rewardFunction = RewardFunction(actionCost, terminalReward, swampPenalty, isTerminal, isInSwamp) maxRunningSteps = 50 oneStepSampleTrajectory = OneStepSampleTrajectory(twoAgentTransit, rewardFunction) sampleTrajecoty = SampleTrajectory(maxRunningSteps, isTerminal, resetState, oneStepSampleTrajectory) randomPolicy = RandomPolicy(actionSpace) actionDistribution = randomPolicy() #numSimulation, selectChild, expand, estimateValue, backup, outputDistribution numSimulation = 200 cInit = 0 cBase = 1 scoreChild = ScoreChild(cInit, cBase) selectChild = SelectChild(scoreChild) uniformActionPrior = {action: 1 / 4 for action in actionSpace} getActionPrior = lambda state: uniformActionPrior initializeChildren = InitializeChildren(actionSpace, twoAgentTransit, getActionPrior) expand = Expand(isTerminal, initializeChildren) rolloutPolicy = lambda state: random.choice(actionSpace) rolloutHeuristic = lambda state: 1 maxRolloutStep = 10 estimateValue = RollOut(rolloutPolicy, maxRolloutStep, twoAgentTransit, rewardFunction, isTerminal, rolloutHeuristic) mctsSelectAction = MCTS(numSimulation, selectChild, expand, estimateValue, backup, establishPlainActionDist) #sampleAction = SampleFromDistribution(actionDictionary) def sampleAction(state): actionDist = mctsSelectAction(state) action = maxFromDistribution(actionDist) return action trajectories = [sampleTrajecoty(sampleAction) for _ in range(1)] print(trajectories) DIRNAME = os.path.dirname(__file__) trajectoryDirectory = os.path.join(DIRNAME, '..', '..', 'data', 'evaluateObstacle', 'trajectories') if not os.path.exists(trajectoryDirectory): os.makedirs(trajectoryDirectory) # generate demo image screenWidth = 600 screenHeight = 600 screen = pg.display.set_mode((screenWidth, screenHeight)) screenColor = THECOLORS['black'] xBoundary = [0, 600] yBoundary = [0, 600] lineColor = THECOLORS['white'] lineWidth = 4 xSwamp = [300, 300] ySwamp = [300, 300] drawBackground = DrawBackground(screen, screenColor, xBoundary, yBoundary, lineColor, lineWidth, xSwamp, ySwamp) fps = 40 circleColorSpace = np.array([[0, 0, 255], [0, 255, 255]]) circleSize = 10 positionIndex = [0, 1] saveImage = True imageSavePath = os.path.join(trajectoryDirectory, 'picMovingSheep') if not os.path.exists(imageSavePath): os.makedirs(imageSavePath) trajectoryParameters = 'obstacle' imageFolderName = str(trajectoryParameters) saveImageDir = os.path.join(os.path.join(imageSavePath, imageFolderName)) if not os.path.exists(saveImageDir): os.makedirs(saveImageDir) agentIdsToDraw = list(range(2)) drawState = DrawState(fps, screen, circleColorSpace, circleSize, agentIdsToDraw, positionIndex, saveImage, saveImageDir, drawBackground) numFramesToInterpolate = 3 interpolateState = InterpolateState(numFramesToInterpolate, twoAgentTransit) stateIndexInTimeStep = 0 actionIndexInTimeStep = 1 chaseTrial = ChaseTrialWithTraj(stateIndexInTimeStep, drawState, interpolateState, actionIndexInTimeStep) [chaseTrial(trajectory) for trajectory in trajectories] pg.quit()
def main(): # comments for explanation startTime = datetime.now() # experiment conditions numTrials = 2 manipulatedVariables = OrderedDict() manipulatedVariables['numSimulations'] = [5, 6, 7] manipulatedVariables['rolloutHeuristicWeight'] = [0.1, 0] manipulatedVariables['maxRolloutSteps'] = [10, 0] manipulatedVariables['trialIndex'] = list(range(numTrials)) levelNames = list(manipulatedVariables.keys()) levelValues = list(manipulatedVariables.values()) modelIndex = pd.MultiIndex.from_product(levelValues, names=levelNames) toSplitFrame = pd.DataFrame(index=modelIndex) toSplitFrame = toSplitFrame.select(lambda row: (row[1] == 0.1 and row[2] == 0) or (row[1] == 0 and row[2] == 10), axis=0) # reset function envModelName = 'twoAgents' qPosInit = [-9.8, 9.8, 8, 0] qVelInit = [0, 0, 0, 0] numAgents = 2 qPosInitNoise = 0 qVelInitNoise = 0 reset = Reset(envModelName, qPosInit, qVelInit, numAgents, qPosInitNoise, qVelInitNoise) # isTerminal killzoneRadius = 0.5 isTerminal = IsTerminal(killzoneRadius) # transit for multiplayer transition # sheepTransit for sheep's MCTS simulation (only takes sheep's action as input) renderOn = False numSimulationFrames = 20 transit = TransitionFunction(envModelName, isTerminal, renderOn, numSimulationFrames) sheepTransit = lambda state, action: transit( state, [action, stationaryWolfPolicy(state)]) # reward function (aliveBonus is negative and deathPenalty is positive because sheep is chasing wolf). aliveBonus = -0.05 deathPenalty = 1 rewardFunction = reward.RewardFunctionCompete(aliveBonus, deathPenalty, isTerminal) # select child cInit = 1 cBase = 100 calculateScore = CalculateScore(cInit, cBase) selectChild = SelectChild(calculateScore) # prior actionSpace = [(10, 0), (7, 7), (0, 10), (-7, 7), (-10, 0), (-7, -7), (0, -10), (7, -7)] getActionPrior = GetActionPrior(actionSpace) # initialize children; expand initializeChildren = InitializeChildren(actionSpace, sheepTransit, getActionPrior) expand = Expand(isTerminal, initializeChildren) # random rollout policy numActionSpace = len(actionSpace) rolloutPolicy = lambda state: actionSpace[np.random.choice( range(numActionSpace))] # select next action selectNextAction = SelectNextAction(sheepTransit) sheepId = 0 wolfId = 1 xPosIndex = 2 numXPosEachAgent = 2 maxRunningSteps = 5 getSheepXPos = GetAgentPos(sheepId, xPosIndex, numXPosEachAgent) getWolfXPos = GetAgentPos(wolfId, xPosIndex, numXPosEachAgent) # wrapper functions getRolloutHeuristic = lambda rolloutHeuristicWeight: HeuristicDistanceToTarget( rolloutHeuristicWeight, getWolfXPos, getSheepXPos ) # change the interface of heuristic function getRollout = lambda maxRolloutSteps, rolloutHeuristic: RollOut( rolloutPolicy, maxRolloutSteps, sheepTransit, rewardFunction, isTerminal, rolloutHeuristic) getMCTS = lambda numSimulations, rollout: MCTS( numSimulations, selectChild, expand, rollout, backup, selectNextAction) # sample trajectory sampleTrajectory = SampleTrajectory(maxRunningSteps, transit, isTerminal, reset) runTrial = RunTrial(getRolloutHeuristic, getRollout, getMCTS, sampleTrajectory) # run all trials and generate trajectories trialDataDf = toSplitFrame.groupby(levelNames).apply(runTrial) print(trialDataDf.values[0][0]) pickle_out = open('trajectories/testQEffectOnMCTSMujoco.pickle', 'wb') pickle.dump(trialDataDf, pickle_out) pickle_out.close() # meausurement function initState = reset() allAgentsOptimalActions = [(10, 0), (0, 0)] # sheep directly moves towards wolf optimalNextState = transit(initState, allAgentsOptimalActions) optimalNextPosition = getSheepXPos(optimalNextState) nextPositionIndex = 1 # we are interested in the state at 2nd time step stateIndexInTuple = 0 # tuple = (state, action). State is the first element. firstTrialIndex = 0 getSheepXPosAtNextStepFromTrajectory = GetPosFromTrajectory( nextPositionIndex, stateIndexInTuple, sheepId, xPosIndex, numXPosEachAgent) getFirstTrajectoryFromDf = GetTrialTrajectoryFromDf(firstTrialIndex) measurementFunction = DistanceBetweenActualAndOptimalNextPosition( optimalNextPosition, getSheepXPosAtNextStepFromTrajectory, getFirstTrajectoryFromDf) # make measurements on individual trial measurementDf = trialDataDf.groupby(levelNames).apply(measurementFunction) pickle_out = open('measurements/testQEffectOnMCTSMujoco.pickle', 'wb') pickle.dump(measurementDf, pickle_out) pickle_out.close() # compute statistics on the measurements (mean, standard error) manipulatedVariablesExceptTrialIndex = copy.deepcopy(levelNames) manipulatedVariablesExceptTrialIndex.remove('trialIndex') dataStatisticsDf = measurementDf.groupby( manipulatedVariablesExceptTrialIndex).distance.agg(['mean', 'std']) print(dataStatisticsDf) pickle_out = open('statistics/testQEffectOnMCTSMujoco.pickle', 'wb') pickle.dump(dataStatisticsDf, pickle_out) pickle_out.close() # plot the statistics fig = plt.figure() plotRowNum = 1 plotColNum = 1 plotCounter = 1 legendLabels = {(0, 10): 'Rollout', (0.1, 0): 'No Rollout'} axForDraw = fig.add_subplot(plotRowNum, plotColNum, plotCounter) drawPerformanceLine(dataStatisticsDf, axForDraw, 'numSimulations', legendLabels) plt.legend(loc='best') plt.show() endTime = datetime.now() print("Time taken: ", endTime - startTime)