예제 #1
0
파일: plan.py 프로젝트: shiyani21/stacking
def plan(timeout, blocks, problem, model):
    tree = Tree(blocks)
    for t in range(timeout):
        parent_node_id = tree.get_exp_best_node_expand()
        #print(t, len(tree.nodes[parent_node_id]['tower']), tree.nodes[parent_node_id]['value'])
        sys.stdout.write("Search progress: %i   \r" % (t))
        sys.stdout.flush()
        new_nodes = problem.sample_actions(tree.nodes[parent_node_id], model)
        for node in new_nodes:
            tree.expand(parent_node_id, node)
    return tree
예제 #2
0
파일: plan.py 프로젝트: shiyani21/stacking
def plan_mcts(logger, timeout, blocks, problem, model, c=1., discrete=True):
    tree = Tree(blocks)
    tallest_tower = [0]
    highest_exp_height = [0]
    highest_value = [0]
    tower_stats = np.zeros((problem.max_height, timeout))
    node_values = {
        k: {
            'median': [],
            '25': [],
            '75': []
        }
        for k in range(problem.max_height + 1)
    }

    for t in range(timeout):
        tower_stats[:, t] = tower_stats[:, t - 1]
        sys.stdout.write("Search progress: %i   \r" % (t))
        sys.stdout.flush()
        parent_node_id = tree.traverse(c)

        new_nodes = problem.sample_actions(tree.nodes[parent_node_id],
                                           model,
                                           discrete=discrete)
        tallest_tower_t = tallest_tower[-1]
        highest_exp_height_t = highest_exp_height[-1]
        highest_value_t = highest_value[-1]
        for new_node in new_nodes:
            #print(t, len(new_node['tower']), new_node['exp_reward'])
            new_node_id = tree.expand(parent_node_id, new_node)
            rollout_value = tree.rollout(new_node_id, problem, model)
            tree.backpropagate(new_node_id, rollout_value)

            tower_height = len(new_node['tower'])
            #print(tower_height)
            index = int(tower_height)
            tower_stats[index - 1, t] += 1
            if len(new_node['tower']) > tallest_tower_t:
                tallest_tower_t = len(new_node['tower'])

            if new_node['exp_reward'] > highest_exp_height_t:
                highest_exp_height_t = new_node['exp_reward']

            if new_node['value'] > highest_value_t:
                highest_value_t = new_node['value']

        tallest_tower.append(tallest_tower_t)
        highest_exp_height.append(highest_exp_height_t)
        highest_value.append(highest_value_t)

        # update node value stats
        temp_values = {k: [] for k in range(problem.max_height + 1)}
        for node in tree.nodes:
            height = len(tree.nodes[node]['tower'])
            temp_values[height].append(tree.nodes[node]['value'])
        for height in range(problem.max_height + 1):
            if temp_values[height] == []:
                node_values[height]['median'].append(0)
                node_values[height]['25'].append(0)
                node_values[height]['75'].append(0)
            else:
                node_values[height]['median'].append(
                    np.median(temp_values[height]))
                node_values[height]['25'].append(
                    np.quantile(temp_values[height], .25))
                node_values[height]['75'].append(
                    np.quantile(temp_values[height], .75))

    return tree, tallest_tower, highest_exp_height, highest_value, tower_stats, node_values