def train(base_class_name, demo_numbers, program_generation_step_size, num_programs, num_dts, max_num_particles):
    programs, program_prior_log_probs = get_program_set(base_class_name, num_programs)

    X, y = run_all_programs_on_demonstrations(base_class_name, num_programs, demo_numbers)
    plps, plp_priors = learn_plps(X, y, programs, program_prior_log_probs, num_dts=num_dts,
        program_generation_step_size=program_generation_step_size)

    demonstrations = get_demonstrations(base_class_name, demo_numbers=demo_numbers)
    likelihoods = compute_likelihood_plps(plps, demonstrations)

    particles = []
    particle_log_probs = []

    for plp, prior, likelihood in zip(plps, plp_priors, likelihoods):
        particles.append(plp)
        particle_log_probs.append(prior + likelihood)

    print("\nDone!")
    map_idx = np.argmax(particle_log_probs).squeeze()
    print("MAP program ({}):".format(particle_log_probs[map_idx]))
    print(particles[map_idx])

    top_particles, top_particle_log_probs = select_particles(particles, particle_log_probs, max_num_particles)
    if len(top_particle_log_probs) > 0:
        top_particle_log_probs = np.array(top_particle_log_probs) - logsumexp(top_particle_log_probs)
        top_particle_probs = np.exp(top_particle_log_probs)
        print("top_particle_probs:", top_particle_probs)
        policy = PLPPolicy(top_particles, top_particle_probs)
    else:
        print("no nontrivial particles found")
        policy = PLPPolicy([StateActionProgram("False")], [1.0])

    return policy
Example #2
0
def extract_plp_from_dt(estimator, features, feature_log_probs):
    n_nodes = estimator.tree_.node_count
    children_left = estimator.tree_.children_left
    children_right = estimator.tree_.children_right
    node_to_features = estimator.tree_.feature
    threshold = estimator.tree_.threshold
    value = estimator.tree_.value.squeeze()

    stack = [0]
    parents = {0 : None}
    true_leaves = []

    total_pos_leaf = 0 
    total_neg_leaf = 0
    while len(stack) > 0:
        node_id = stack.pop()

        if (children_left[node_id] != children_right[node_id]):
            assert 0 < threshold[node_id] < 1
            stack.append(children_left[node_id])
            parents[children_left[node_id]] = (node_id, 'left')
            stack.append(children_right[node_id])
            parents[children_right[node_id]] = (node_id, 'right')
        elif value[node_id][1] > value[node_id][0]:
            true_leaves.append(node_id)
            total_pos_leaf = value[node_id][1] + total_pos_leaf
            total_neg_leaf = value[node_id][0] + total_neg_leaf
            true_leaves.append(node_id)

    print("Sum of total positive leaves {}".format(total_pos_leaf))
    print("Sum of total negative leaves {}".format(total_neg_leaf))
    try:
        print("Likelihood? {}".format(np.log(total_pos_leaf/(total_pos_leaf+total_neg_leaf))))
    except:
        print("Likelihood {}".format("Nan"))

    paths_to_true_leaves = [get_path_to_leaf(leaf, parents) for leaf in true_leaves]

    conjunctive_programs = []
    program_log_prob = 0.

    for path in paths_to_true_leaves:
        and_program, log_p = get_conjunctive_program(path, node_to_features, features, feature_log_probs)
        conjunctive_programs.append(and_program)
        program_log_prob += log_p

    disjunctive_program = get_disjunctive_program(conjunctive_programs)

    if not isinstance(disjunctive_program, StateActionProgram):
        disjunctive_program = StateActionProgram(disjunctive_program)

    return disjunctive_program, program_log_prob
def generate_programs(grammar, start_symbol=0, num_iterations=100000000):
    queue = []
    counter = itertools.count()

    hq.heappush(queue, (0, 0, next(counter), [start_symbol]))

    for iteration in range(num_iterations):
        priority, production_neg_log_prob, _, program = hq.heappop(queue)

        for child_program, child_production_prob, child_priority in get_child_programs(program, grammar):
            if program_is_complete(child_program):
                yield StateActionProgram(stringify(child_program)), -production_neg_log_prob + np.log(child_production_prob)
            else:
                hq.heappush(queue, (priority + child_priority, production_neg_log_prob - np.log(child_production_prob), 
                                    next(counter), child_program))
Example #4
0
def extract_plp_from_dt(estimator, features, feature_log_probs):
    n_nodes = estimator.tree_.node_count
    children_left = estimator.tree_.children_left
    children_right = estimator.tree_.children_right
    node_to_features = estimator.tree_.feature
    threshold = estimator.tree_.threshold
    value = estimator.tree_.value.squeeze()

    stack = [0]
    parents = {0: None}
    true_leaves = []

    while len(stack) > 0:
        node_id = stack.pop()

        try:
            if (children_left[node_id] != children_right[node_id]):
                assert 0 < threshold[node_id] < 1
                stack.append(children_left[node_id])
                parents[children_left[node_id]] = (node_id, 'left')
                stack.append(children_right[node_id])
                parents[children_right[node_id]] = (node_id, 'right')
            elif value[node_id][1] > value[node_id][0]:
                true_leaves.append(node_id)
        except IndexError:
            if value[1] > value[0]:
                true_leaves.append(node_id)

    paths_to_true_leaves = [
        get_path_to_leaf(leaf, parents) for leaf in true_leaves
    ]

    conjunctive_programs = []
    program_log_prob = 0.

    for path in paths_to_true_leaves:
        and_program, log_p = get_conjunctive_program(path, node_to_features,
                                                     features,
                                                     feature_log_probs)
        conjunctive_programs.append(and_program)
        program_log_prob += log_p

    disjunctive_program = get_disjunctive_program(conjunctive_programs)

    if not isinstance(disjunctive_program, StateActionProgram):
        disjunctive_program = StateActionProgram(disjunctive_program)

    return disjunctive_program, program_log_prob