def qlearn3(resume=True): root_state = State(ACTORS, PLACES, ITEMS) root_node = TreeNode(state=root_state, parent_edge=None, possible_methods=True) from tree import POSSIBLE_METHODS num_methods = len(POSSIBLE_METHODS) table2 = {} eps = 0.2 if resume: with open("table2.pickle", "rb") as table2file: table2 = pickle.load(table2file) current_node = root_node edge = None depth = 0 counter = 0 prob_dist = initialize_prob_dist() while True: if depth >= 20: depth = 0 prob_dist = initialize_prob_dist() counter += 1 edge = None #print() if counter % 100 == 0: print("Counter - " + str(counter) + " - Dumping To File") with open("table2.pickle", "wb") as table2file: pickle.dump(table2, table2file, protocol=pickle.HIGHEST_PROTOCOL) root_state = State(ACTORS, PLACES, ITEMS) root_node = TreeNode(state=root_state, parent_edge=None, possible_methods=True) current_node = root_node continue next_edge = expand_heuristic_edge(current_node, prob_dist) expand_all_believable_edges(node=current_node, debug=True) best_edge = choose_max_q_edge(node=current_node) if edge != None: reward = percent_goals_satisfied(current_node, GOALS) idx = state_index_number_2(edge.prev_node.state) if idx not in table2: table2[idx] = [0.1] * num_methods idxc = state_index_number_2(current_node.state) if idxc not in table2: table2[idxc] = [0.1] * num_methods #print(idxc) #print(idx) #print(len(POSSIBLE_METHODS)) bestqval = table2[idxc][find_edge_index(best_edge)] qval = table2[idx][find_edge_index(edge)] table2[idx][find_edge_index( edge)] = qval + 0.1 * (reward + 0.9 * (bestqval) - qval) #print("{} {} {}".format(edge.method.sentence, reward, edge.qval)) edge = next_edge depth += 1 current_node = edge.next_node
def rollout_story_2(node, max_simlength): root = TreeNode(node.state) curr_node = root numsims = 0 while numsims < max_simlength and not goals_satisfied(curr_node, GOALS): expand_rand_edge(curr_node) curr_node = curr_node.edges[-1].next_node if curr_node.believability == 0: curr_node = curr_node.parent_edge.prev_node continue numsims += 1 print(Story(curr_node)) return rollout_value(curr_node.believability, percent_goals_satisfied(curr_node, GOALS))
def run_once(debug=True): # Randomly assigns actors, places, and items for story root_state = random_state(3, 3) # Initialize Root Node - Possible Methods boolean MUST BE TRUE root_node = TreeNode(root_state, parent_edge=None, possible_methods=True) # Total methods in story num_methods = len(root_node.possible_methods) """ The following max_numsim = max_expansion * thres max_iter : Number of sentances in story = number of story nodes - 1 = number of story edges max_expansion : Number of expansions in search max_simlength : Maximum length of rollout C : Exploration Constant for selection thres : Minimum MCTS Visits for node expansion """ # Perform Monte Carlo - returns final node and whole story max_expansion = 200 if max_expansion < len(root_node.possible_methods): raise ValueError( "Max exp ({}) should be greater than num methods({})".format( max_expansion, len(root_node.possible_methods))) max_iter = 15 max_simlength = 15 C = 1 thres = 20 print( "Max iteration: {}\nMax Expansion: {}\nMax simulation length: {}\nC: {}\nThreshold: {}" .format(max_iter, max_expansion, max_simlength, C, thres)) n, s = mcts(root_node, max_iter, max_expansion, max_simlength, C, thres, debug=True) # Print out results if debug: print(s) print(n.believability) print(n.value) print(percent_goals_satisfied(n, GOALS)) return (n, s)
def rollout_story(node, max_simlength): root = TreeNode(node.state) curr_node = root numsims = 0 while numsims < max_simlength and not goals_satisfied(curr_node, GOALS): expand_rand_edge(curr_node) curr_node = curr_node.edges[-1].next_node if curr_node.believability == 0: p_believability = curr_node.parent_edge.prev_node.believability curr_node.believability = p_believability * (numsims + 1) / max_simlength break numsims += 1 return rollout_value(curr_node.believability, percent_goals_satisfied(curr_node, GOALS))
def rollout_story_3(node, max_simlength): # Create a new tree root = TreeNode(node.state) curr_node = root numsims = 0 # Have probability distribution for each edge prob_dist = initialize_prob_dist() # Keep rolling out until max_simlength or goals satisfied while numsims < max_simlength: # and not goals_satisfied(curr_node, GOALS): # Choose edge based on prob_dist expand_heuristic_edge(curr_node, prob_dist) # Reassign current node to current node's child curr_node = curr_node.edges[-1].next_node # Update the simulation depth numsims += 1 return rollout_value(curr_node.believability, percent_goals_satisfied(curr_node, GOALS))
def qlearn(resume=True): root_state = State(ACTORS, PLACES, ITEMS) root_node = TreeNode(state=root_state, parent_edge=None, possible_methods=True) if resume: with open("tree.pickle", "rb") as treefile: root_node = pickle.load(treefile) current_node = root_node edge = None depth = 0 counter = 0 while True: if depth >= 5: depth = 0 current_node = root_node print(current_node.state.actors["DAPHNE"]["place"]) counter += 1 print() if counter % 100 == 0: print("Counter - " + str(counter) + " - Dumping To File") with open("tree.pickle", "wb") as treefile: pickle.dump(root_node, treefile, protocol=pickle.HIGHEST_PROTOCOL) continue if not current_node.edges: expand_all_believable_edges(node=current_node, debug=True) next_edge = choose_q_edge(node=current_node, epsilon=0.2) best_edge = choose_max_q_edge(node=current_node) if edge != None: reward = percent_goals_satisfied(current_node, GOALS) edge.qval = edge.qval + 0.1 * (reward + 0.9 * (best_edge.qval) - edge.qval) print("{} {} {}".format(edge.method.sentence, reward, edge.qval)) edge = next_edge depth += 1 current_node = edge.next_node