def qlearn3(resume=True):
    root_state = State(ACTORS, PLACES, ITEMS)
    root_node = TreeNode(state=root_state,
                         parent_edge=None,
                         possible_methods=True)
    from tree import POSSIBLE_METHODS
    num_methods = len(POSSIBLE_METHODS)
    table2 = {}
    eps = 0.2
    if resume:
        with open("table2.pickle", "rb") as table2file:
            table2 = pickle.load(table2file)
    current_node = root_node
    edge = None
    depth = 0
    counter = 0
    prob_dist = initialize_prob_dist()
    while True:
        if depth >= 20:
            depth = 0
            prob_dist = initialize_prob_dist()
            counter += 1
            edge = None
            #print()
            if counter % 100 == 0:
                print("Counter - " + str(counter) + " - Dumping To File")
                with open("table2.pickle", "wb") as table2file:
                    pickle.dump(table2,
                                table2file,
                                protocol=pickle.HIGHEST_PROTOCOL)
            root_state = State(ACTORS, PLACES, ITEMS)
            root_node = TreeNode(state=root_state,
                                 parent_edge=None,
                                 possible_methods=True)
            current_node = root_node
            continue
        next_edge = expand_heuristic_edge(current_node, prob_dist)
        expand_all_believable_edges(node=current_node, debug=True)
        best_edge = choose_max_q_edge(node=current_node)
        if edge != None:
            reward = percent_goals_satisfied(current_node, GOALS)
            idx = state_index_number_2(edge.prev_node.state)
            if idx not in table2:
                table2[idx] = [0.1] * num_methods
            idxc = state_index_number_2(current_node.state)
            if idxc not in table2:
                table2[idxc] = [0.1] * num_methods
            #print(idxc)
            #print(idx)
            #print(len(POSSIBLE_METHODS))
            bestqval = table2[idxc][find_edge_index(best_edge)]
            qval = table2[idx][find_edge_index(edge)]
            table2[idx][find_edge_index(
                edge)] = qval + 0.1 * (reward + 0.9 * (bestqval) - qval)
            #print("{} {} {}".format(edge.method.sentence, reward, edge.qval))
        edge = next_edge
        depth += 1
        current_node = edge.next_node
Ejemplo n.º 2
0
def rollout_story_2(node, max_simlength):
    root = TreeNode(node.state)
    curr_node = root
    numsims = 0
    while numsims < max_simlength and not goals_satisfied(curr_node, GOALS):
        expand_rand_edge(curr_node)
        curr_node = curr_node.edges[-1].next_node
        if curr_node.believability == 0:
            curr_node = curr_node.parent_edge.prev_node
            continue
        numsims += 1
    print(Story(curr_node))
    return rollout_value(curr_node.believability,
                         percent_goals_satisfied(curr_node, GOALS))
Ejemplo n.º 3
0
def run_once(debug=True):
    # Randomly assigns actors, places, and items for story
    root_state = random_state(3, 3)

    # Initialize Root Node - Possible Methods boolean MUST BE TRUE
    root_node = TreeNode(root_state, parent_edge=None, possible_methods=True)

    # Total methods in story
    num_methods = len(root_node.possible_methods)
    """
    The following 
        max_numsim = max_expansion * thres

    max_iter : Number of sentances in story = number of story nodes - 1 = number of story edges
    max_expansion : Number of expansions in search
    max_simlength : Maximum length of rollout
    C : Exploration Constant for selection
    thres : Minimum MCTS Visits for node expansion
    """
    # Perform Monte Carlo - returns final node and whole story
    max_expansion = 200
    if max_expansion < len(root_node.possible_methods):
        raise ValueError(
            "Max exp ({}) should be greater than num methods({})".format(
                max_expansion, len(root_node.possible_methods)))

    max_iter = 15
    max_simlength = 15
    C = 1
    thres = 20
    print(
        "Max iteration: {}\nMax Expansion: {}\nMax simulation length: {}\nC: {}\nThreshold: {}"
        .format(max_iter, max_expansion, max_simlength, C, thres))
    n, s = mcts(root_node,
                max_iter,
                max_expansion,
                max_simlength,
                C,
                thres,
                debug=True)

    # Print out results
    if debug:
        print(s)
        print(n.believability)
        print(n.value)
        print(percent_goals_satisfied(n, GOALS))

    return (n, s)
Ejemplo n.º 4
0
def rollout_story(node, max_simlength):
    root = TreeNode(node.state)
    curr_node = root
    numsims = 0
    while numsims < max_simlength and not goals_satisfied(curr_node, GOALS):
        expand_rand_edge(curr_node)
        curr_node = curr_node.edges[-1].next_node
        if curr_node.believability == 0:
            p_believability = curr_node.parent_edge.prev_node.believability
            curr_node.believability = p_believability * (numsims +
                                                         1) / max_simlength
            break
        numsims += 1
    return rollout_value(curr_node.believability,
                         percent_goals_satisfied(curr_node, GOALS))
Ejemplo n.º 5
0
def rollout_story_3(node, max_simlength):
    # Create a new tree
    root = TreeNode(node.state)
    curr_node = root
    numsims = 0

    # Have probability distribution for each edge
    prob_dist = initialize_prob_dist()

    # Keep rolling out until max_simlength or goals satisfied
    while numsims < max_simlength:  # and not goals_satisfied(curr_node, GOALS):

        # Choose edge based on prob_dist
        expand_heuristic_edge(curr_node, prob_dist)

        # Reassign current node to current node's child
        curr_node = curr_node.edges[-1].next_node

        # Update the simulation depth
        numsims += 1
    return rollout_value(curr_node.believability,
                         percent_goals_satisfied(curr_node, GOALS))
def qlearn(resume=True):
    root_state = State(ACTORS, PLACES, ITEMS)
    root_node = TreeNode(state=root_state,
                         parent_edge=None,
                         possible_methods=True)
    if resume:
        with open("tree.pickle", "rb") as treefile:
            root_node = pickle.load(treefile)
    current_node = root_node
    edge = None
    depth = 0
    counter = 0
    while True:
        if depth >= 5:
            depth = 0
            current_node = root_node
            print(current_node.state.actors["DAPHNE"]["place"])
            counter += 1
            print()
            if counter % 100 == 0:
                print("Counter - " + str(counter) + " - Dumping To File")
                with open("tree.pickle", "wb") as treefile:
                    pickle.dump(root_node,
                                treefile,
                                protocol=pickle.HIGHEST_PROTOCOL)
            continue
        if not current_node.edges:
            expand_all_believable_edges(node=current_node, debug=True)
        next_edge = choose_q_edge(node=current_node, epsilon=0.2)
        best_edge = choose_max_q_edge(node=current_node)
        if edge != None:
            reward = percent_goals_satisfied(current_node, GOALS)
            edge.qval = edge.qval + 0.1 * (reward + 0.9 *
                                           (best_edge.qval) - edge.qval)
            print("{} {} {}".format(edge.method.sentence, reward, edge.qval))
        edge = next_edge
        depth += 1
        current_node = edge.next_node