Python StateActionProgram Examples

Programming Language: Python

Namespace/Package Name: policy

Examples at hotexamples.com: 4

Python StateActionProgram - 4 examples found. These are the top rated real world Python examples of policy.StateActionProgram extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

StateActionProgram(4)

Frequently Used Methods

StateActionProgram (4)

Example #1

Show file

File: pipeline.py Project: yichao-l/policies_logic_programs

def train(base_class_name, demo_numbers, program_generation_step_size, num_programs, num_dts, max_num_particles):
    programs, program_prior_log_probs = get_program_set(base_class_name, num_programs)

    X, y = run_all_programs_on_demonstrations(base_class_name, num_programs, demo_numbers)
    plps, plp_priors = learn_plps(X, y, programs, program_prior_log_probs, num_dts=num_dts,
        program_generation_step_size=program_generation_step_size)

    demonstrations = get_demonstrations(base_class_name, demo_numbers=demo_numbers)
    likelihoods = compute_likelihood_plps(plps, demonstrations)

    particles = []
    particle_log_probs = []

    for plp, prior, likelihood in zip(plps, plp_priors, likelihoods):
        particles.append(plp)
        particle_log_probs.append(prior + likelihood)

    print("\nDone!")
    map_idx = np.argmax(particle_log_probs).squeeze()
    print("MAP program ({}):".format(particle_log_probs[map_idx]))
    print(particles[map_idx])

    top_particles, top_particle_log_probs = select_particles(particles, particle_log_probs, max_num_particles)
    if len(top_particle_log_probs) > 0:
        top_particle_log_probs = np.array(top_particle_log_probs) - logsumexp(top_particle_log_probs)
        top_particle_probs = np.exp(top_particle_log_probs)
        print("top_particle_probs:", top_particle_probs)
        policy = PLPPolicy(top_particles, top_particle_probs)
    else:
        print("no nontrivial particles found")
        policy = PLPPolicy([StateActionProgram("False")], [1.0])

    return policy

Example #2

Show file

def extract_plp_from_dt(estimator, features, feature_log_probs):
    n_nodes = estimator.tree_.node_count
    children_left = estimator.tree_.children_left
    children_right = estimator.tree_.children_right
    node_to_features = estimator.tree_.feature
    threshold = estimator.tree_.threshold
    value = estimator.tree_.value.squeeze()

    stack = [0]
    parents = {0 : None}
    true_leaves = []

    total_pos_leaf = 0 
    total_neg_leaf = 0
    while len(stack) > 0:
        node_id = stack.pop()

        if (children_left[node_id] != children_right[node_id]):
            assert 0 < threshold[node_id] < 1
            stack.append(children_left[node_id])
            parents[children_left[node_id]] = (node_id, 'left')
            stack.append(children_right[node_id])
            parents[children_right[node_id]] = (node_id, 'right')
        elif value[node_id][1] > value[node_id][0]:
            true_leaves.append(node_id)
            total_pos_leaf = value[node_id][1] + total_pos_leaf
            total_neg_leaf = value[node_id][0] + total_neg_leaf
            true_leaves.append(node_id)

    print("Sum of total positive leaves {}".format(total_pos_leaf))
    print("Sum of total negative leaves {}".format(total_neg_leaf))
    try:
        print("Likelihood? {}".format(np.log(total_pos_leaf/(total_pos_leaf+total_neg_leaf))))
    except:
        print("Likelihood {}".format("Nan"))

    paths_to_true_leaves = [get_path_to_leaf(leaf, parents) for leaf in true_leaves]

    conjunctive_programs = []
    program_log_prob = 0.

    for path in paths_to_true_leaves:
        and_program, log_p = get_conjunctive_program(path, node_to_features, features, feature_log_probs)
        conjunctive_programs.append(and_program)
        program_log_prob += log_p

    disjunctive_program = get_disjunctive_program(conjunctive_programs)

    if not isinstance(disjunctive_program, StateActionProgram):
        disjunctive_program = StateActionProgram(disjunctive_program)

    return disjunctive_program, program_log_prob

Example #3

Show file

File: grammar_utils.py Project: AlbertG99/lpp_meta_learning

def generate_programs(grammar, start_symbol=0, num_iterations=100000000):
    queue = []
    counter = itertools.count()

    hq.heappush(queue, (0, 0, next(counter), [start_symbol]))

    for iteration in range(num_iterations):
        priority, production_neg_log_prob, _, program = hq.heappop(queue)

        for child_program, child_production_prob, child_priority in get_child_programs(program, grammar):
            if program_is_complete(child_program):
                yield StateActionProgram(stringify(child_program)), -production_neg_log_prob + np.log(child_production_prob)
            else:
                hq.heappush(queue, (priority + child_priority, production_neg_log_prob - np.log(child_production_prob), 
                                    next(counter), child_program))

Example #4

Show file

def extract_plp_from_dt(estimator, features, feature_log_probs):
    n_nodes = estimator.tree_.node_count
    children_left = estimator.tree_.children_left
    children_right = estimator.tree_.children_right
    node_to_features = estimator.tree_.feature
    threshold = estimator.tree_.threshold
    value = estimator.tree_.value.squeeze()

    stack = [0]
    parents = {0: None}
    true_leaves = []

    while len(stack) > 0:
        node_id = stack.pop()

        try:
            if (children_left[node_id] != children_right[node_id]):
                assert 0 < threshold[node_id] < 1
                stack.append(children_left[node_id])
                parents[children_left[node_id]] = (node_id, 'left')
                stack.append(children_right[node_id])
                parents[children_right[node_id]] = (node_id, 'right')
            elif value[node_id][1] > value[node_id][0]:
                true_leaves.append(node_id)
        except IndexError:
            if value[1] > value[0]:
                true_leaves.append(node_id)

    paths_to_true_leaves = [
        get_path_to_leaf(leaf, parents) for leaf in true_leaves
    ]

    conjunctive_programs = []
    program_log_prob = 0.

    for path in paths_to_true_leaves:
        and_program, log_p = get_conjunctive_program(path, node_to_features,
                                                     features,
                                                     feature_log_probs)
        conjunctive_programs.append(and_program)
        program_log_prob += log_p

    disjunctive_program = get_disjunctive_program(conjunctive_programs)

    if not isinstance(disjunctive_program, StateActionProgram):
        disjunctive_program = StateActionProgram(disjunctive_program)

    return disjunctive_program, program_log_prob