def main(): searchParams= MCTSPARAMS.MCTSPARAMS()#MCTS::PARAMS expParams= EXPERIMENTPARAMS.EXPERIMENTPARAMS()# EXPERIMENT::PARAMS knowledge= KNOWLEDGE.KNOWLEDGE()# SIMULATOR::KNOWLEDGE problem="" outputfile="" policy=""# size=0 number=0 treeknowledge = 1 rolloutknowledge = 1 smarttreecount = 10# int smarttreevalue = 1.0# #real =NETWORK.NETWORK(size, number)# #simulator =NETWORK.NETWORK(size, number)# # file="C:\projs\\40_weka_randomForest9.txt" ei= .sfl.Diagnoser.diagnoserUtils.readPlanningFile(file) real = DIAGNOSER.DIAGNOSER(ei, 0.7)# simulator = DIAGNOSER.DIAGNOSER(ei.Copy(), 0.7)# simulator.SetKnowledge(knowledge)# experiment = EXPERIMENT.EXPERIMENT(real, simulator, outputfile, expParams, searchParams)#EXPERIMENT experiment.DiscountedReturn()# return 0#
def UnitTestSearch(depth): testSimulator = TEST_SIMULATOR.TEST_SIMULATOR(3, 2, depth) params = MCTSPARAMS.MCTSPARAMS() params.MaxDepth = depth + 1 params.NumSimulations = pow(10, depth + 1) mcts = MCTS(testSimulator, params) mcts.UCTSearch() rootValue = mcts.Root.Value.GetValue() optimalValue = testSimulator.OptimalValue() assert (abs(optimalValue - rootValue) < 0.1)
def main(ei): searchParams= MCTSPARAMS.MCTSPARAMS()#MCTS::PARAMS expParams= EXPERIMENTPARAMS.EXPERIMENTPARAMS()# EXPERIMENT::PARAMS print "start", ei.calc_precision_recall() real = DIAGNOSER.DIAGNOSER(ei, 0.6)# simulator = DIAGNOSER.DIAGNOSER(ei.Copy(), 0.6)# experiment = EXPERIMENT.EXPERIMENT(real, simulator, expParams, searchParams)#EXPERIMENT print "running" return experiment.RunMultiple()#
def UnitTestRollout(): testSimulator = TEST_SIMULATOR.TEST_SIMULATOR(2, 2, 0) params = MCTSPARAMS.MCTSPARAMS() params.NumSimulations = 1000 params.MaxDepth = 10 mcts = MCTS(testSimulator, params) totalReward = 0.0 for n in range(mcts.Params.NumSimulations): state = testSimulator.CreateStartState() mcts.TreeDepth = 0 totalReward += mcts.Rollout(state) rootValue = totalReward / mcts.Params.NumSimulations meanValue = testSimulator.MeanValue() assert (abs(meanValue - rootValue) < 0.1)
def UnitTestGreedy(): testSimulator = TEST_SIMULATOR.TEST_SIMULATOR(5, 5, 0) params = MCTSPARAMS.MCTSPARAMS() mcts = MCTS(testSimulator, params) numAct = testSimulator.GetNumActions() numObs = testSimulator.GetNumObservations() vnode = mcts.ExpandNode(testSimulator.CreateStartState()) vnode.Value.Set(1, 0) vnode.Child(0).Value.Set(0, 1) for action in range(1, numAct): vnode.Child(action).Value.Set(0, 0) x = mcts.GreedyUCB(vnode, False) print "x:", x assert (mcts.GreedyUCB(vnode, False) == 0)
def UnitTestUCB(): testSimulator = TEST_SIMULATOR.TEST_SIMULATOR(5, 5, 0) params = MCTSPARAMS.MCTSPARAMS() mcts = MCTS(testSimulator, params) numAct = testSimulator.GetNumActions() numObs = testSimulator.GetNumObservations() #// With equal value, action with lowest count is selected vnode1 = mcts.ExpandNode(testSimulator.CreateStartState()) vnode1.Value.Set(1, 0) for action in range(numAct): if (action == 3): vnode1.Child(action).Value.Set(99, 0) else: vnode1.Child(action).Value.Set(100 + action, 0) assert (mcts.GreedyUCB(vnode1, True) == 3) #// With high counts, action with highest value is selected vnode2 = mcts.ExpandNode(testSimulator.CreateStartState()) vnode2.Value.Set(1, 0) for action in range(numAct): if (action == 3): vnode2.Child(action).Value.Set(99 + numObs, 1) else: vnode2.Child(action).Value.Set(100 + numAct - action, 0) assert (mcts.GreedyUCB(vnode2, True) == 3) #// Action with low value and low count beats actions with high counts vnode3 = mcts.ExpandNode(testSimulator.CreateStartState()) vnode3.Value.Set(1, 0) for action in range(numAct): if (action == 3): vnode3.Child(action).Value.Set(1, 1) else: vnode3.Child(action).Value.Set(100 + action, 1) assert (mcts.GreedyUCB(vnode3, True) == 3) #// Actions with zero count is always selected vnode4 = mcts.ExpandNode(testSimulator.CreateStartState()) vnode4.Value.Set(1, 0) for action in range(numAct): if (action == 3): vnode4.Child(action).Value.Set(0, 0) else: vnode4.Child(action).Value.Set(1, 1) assert (mcts.GreedyUCB(vnode4, True) == 3)