def __init__(self, real, simulator, expParams, searchParams): self.Real = real self.Simulator = simulator self.ExpParams = expParams self.SearchParams = searchParams if (self.ExpParams.AutoExploration): self.SearchParams.ExplorationConstant = 1 MCTS.InitFastUCB(self.SearchParams.ExplorationConstant)
def __init__(self,real,simulator,outputFile, expParams, searchParams): self.Real = real self.Simulator = simulator self.OutputFile= outputFile self.ExpParams = expParams self.SearchParams = searchParams self.Results = RESULTS.RESULTS() if (self.ExpParams.AutoExploration): if (self.SearchParams.UseRave): self.SearchParams.ExplorationConstant = 0 else: self.SearchParams.ExplorationConstant = simulator.GetRewardRange() MCTS.InitFastUCB(self.SearchParams.ExplorationConstant)
def RunOne(self, tries): mcts = MCTS.MCTS(self.Simulator, self.SearchParams) for x in range(tries): self.RunSim(mcts) state = self.Real.CreateStartState() ei = state.experimentInstance steps = 0 while not state.terminal_or_allReach(): steps = steps + 1 action = mcts.GreedyUCB(state, False) state, terminal, observation, reward = self.Real.RealStep( state, action) ei = state.experimentInstance print "Terminated" precision, recall = ei.calc_precision_recall() print "end", repr(ei) print precision, recall, steps
def RunMultiple(self): #boost::timer timer mcts = MCTS.MCTS(self.Simulator, self.SearchParams) steps = 0 state = self.Real.CreateStartState() #STATE* terminal = False while not (state.terminal_or_allReach() or terminal): action = mcts.SelectAction() ei = state.experimentInstance # print "ei-repr" , repr(ei), ei.calc_precision_recall() # print "selected action" , action ,"HP" , ei.next_tests_by_hp() state, terminal, observation, reward = self.Real.RealStep( state, action) mcts.Update(state) steps = steps + 1 # print "Terminated" ,state.getMaxProb(), len(state.experimentInstance.initial_tests) ei = state.experimentInstance precision, recall = ei.calc_precision_recall() return precision, recall, steps
def Run(self): #boost::timer timer mcts = MCTS.MCTS(self.Simulator, self.SearchParams) undiscountedReturn = 0.0 discountedReturn = 0.0 discount = 1.0 terminal = False outOfParticles = False t = 0 state = self.Real.CreateStartState() #STATE* for t in range(int(self.ExpParams.NumSteps)): action = mcts.SelectAction() # print "action", action, state.experimentInstance.initial_tests, state.getMaxProb(), state.experimentInstance.error terminal, observation, reward = self.Real.Step(state, action) # print "state.getMaxProb", state.getMaxProb() self.Results.Reward.Add(reward) undiscountedReturn += reward discountedReturn += reward * discount discount *= self.Real.GetDiscount() if (terminal): print "Terminated", state.getMaxProb(), len( state.experimentInstance.initial_tests ) #, str(mcts.StatTreeDepth) , str(mcts.StatRolloutDepth) , str(mcts.StatTotalReward) break outOfParticles = not mcts.Update(action, observation, reward) if (outOfParticles): break # if (timer.elapsed() > self.ExpParams.TimeOut): # print "Timed out after ", t , " steps in ", self.results.Time.GetTotal() , "seconds" # break if (outOfParticles): print "Out of particles, finishing episode with SelectRandom" history = mcts.GetHistory() #HISTORY while (++t < self.ExpParams.NumSteps): observation = 0 reward = 0.0 # This passes real state into simulator! # SelectRandom must only use fully observable state # to avoid "cheating" action = self.Simulator.SelectRandom(state, history, mcts.GetStatus()) terminal, observation, reward = self.Real.Step(state, action) self.Results.Reward.Add(reward) undiscountedReturn += reward discountedReturn += reward * discount discount *= self.Real.GetDiscount() if (terminal): print "Terminated" #, str(mcts.StatTreeDepth) , str(mcts.StatRolloutDepth) , str(mcts.StatTotalReward) break history.Add(action, observation) #self.results.Time.Add(timer.elapsed()) self.Results.Time.Add(-1) self.Results.UndiscountedReturn.Add(undiscountedReturn) self.Results.DiscountedReturn.Add(discountedReturn)