Ejemplo n.º 1
0
 def __init__(self, real, simulator, expParams, searchParams):
     self.Real = real
     self.Simulator = simulator
     self.ExpParams = expParams
     self.SearchParams = searchParams
     if (self.ExpParams.AutoExploration):
         self.SearchParams.ExplorationConstant = 1
     MCTS.InitFastUCB(self.SearchParams.ExplorationConstant)
Ejemplo n.º 2
0
 def __init__(self,real,simulator,outputFile, expParams,  searchParams):
     self.Real = real
     self.Simulator = simulator
     self.OutputFile= outputFile
     self.ExpParams = expParams
     self.SearchParams = searchParams
     self.Results = RESULTS.RESULTS()
     if (self.ExpParams.AutoExploration):
         if (self.SearchParams.UseRave):
             self.SearchParams.ExplorationConstant = 0
         else:
             self.SearchParams.ExplorationConstant = simulator.GetRewardRange()
     MCTS.InitFastUCB(self.SearchParams.ExplorationConstant)
Ejemplo n.º 3
0
    def RunOne(self, tries):
        mcts = MCTS.MCTS(self.Simulator, self.SearchParams)
        for x in range(tries):
            self.RunSim(mcts)
        state = self.Real.CreateStartState()
        ei = state.experimentInstance
        steps = 0
        while not state.terminal_or_allReach():
            steps = steps + 1
            action = mcts.GreedyUCB(state, False)
            state, terminal, observation, reward = self.Real.RealStep(
                state, action)
            ei = state.experimentInstance

        print "Terminated"

        precision, recall = ei.calc_precision_recall()
        print "end", repr(ei)
        print precision, recall, steps
Ejemplo n.º 4
0
    def RunMultiple(self):
        #boost::timer timer
        mcts = MCTS.MCTS(self.Simulator, self.SearchParams)

        steps = 0
        state = self.Real.CreateStartState()  #STATE*
        terminal = False
        while not (state.terminal_or_allReach() or terminal):
            action = mcts.SelectAction()
            ei = state.experimentInstance
            # print "ei-repr" , repr(ei), ei.calc_precision_recall()
            # print "selected action" , action ,"HP" , ei.next_tests_by_hp()
            state, terminal, observation, reward = self.Real.RealStep(
                state, action)
            mcts.Update(state)
            steps = steps + 1
        # print  "Terminated" ,state.getMaxProb(), len(state.experimentInstance.initial_tests)

        ei = state.experimentInstance
        precision, recall = ei.calc_precision_recall()
        return precision, recall, steps
Ejemplo n.º 5
0
    def Run(self):
        #boost::timer timer
        mcts = MCTS.MCTS(self.Simulator, self.SearchParams)

        undiscountedReturn = 0.0
        discountedReturn = 0.0
        discount = 1.0
        terminal = False
        outOfParticles = False
        t = 0

        state = self.Real.CreateStartState()  #STATE*

        for t in range(int(self.ExpParams.NumSteps)):
            action = mcts.SelectAction()
            # print "action", action, state.experimentInstance.initial_tests, state.getMaxProb(), state.experimentInstance.error
            terminal, observation, reward = self.Real.Step(state, action)
            # print "state.getMaxProb",  state.getMaxProb()

            self.Results.Reward.Add(reward)
            undiscountedReturn += reward
            discountedReturn += reward * discount
            discount *= self.Real.GetDiscount()

            if (terminal):
                print "Terminated", state.getMaxProb(), len(
                    state.experimentInstance.initial_tests
                )  #, str(mcts.StatTreeDepth) , str(mcts.StatRolloutDepth) , str(mcts.StatTotalReward)
                break

            outOfParticles = not mcts.Update(action, observation, reward)
            if (outOfParticles):
                break

        # if (timer.elapsed() > self.ExpParams.TimeOut):

        #    print  "Timed out after ", t , " steps in ", self.results.Time.GetTotal() , "seconds"
        #   break

        if (outOfParticles):

            print "Out of particles, finishing episode with SelectRandom"
            history = mcts.GetHistory()  #HISTORY
            while (++t < self.ExpParams.NumSteps):

                observation = 0
                reward = 0.0

                # This passes real state into simulator!
                # SelectRandom must only use fully observable state
                # to avoid "cheating"
                action = self.Simulator.SelectRandom(state, history,
                                                     mcts.GetStatus())
                terminal, observation, reward = self.Real.Step(state, action)

                self.Results.Reward.Add(reward)
                undiscountedReturn += reward
                discountedReturn += reward * discount
                discount *= self.Real.GetDiscount()

                if (terminal):

                    print "Terminated"  #, str(mcts.StatTreeDepth) , str(mcts.StatRolloutDepth) , str(mcts.StatTotalReward)
                    break

                history.Add(action, observation)

        #self.results.Time.Add(timer.elapsed())
        self.Results.Time.Add(-1)
        self.Results.UndiscountedReturn.Add(undiscountedReturn)
        self.Results.DiscountedReturn.Add(discountedReturn)