def train(base_class_name, demo_numbers, program_generation_step_size, num_programs, num_dts, max_num_particles): programs, program_prior_log_probs = get_program_set(base_class_name, num_programs) X, y = run_all_programs_on_demonstrations(base_class_name, num_programs, demo_numbers) plps, plp_priors = learn_plps(X, y, programs, program_prior_log_probs, num_dts=num_dts, program_generation_step_size=program_generation_step_size) demonstrations = get_demonstrations(base_class_name, demo_numbers=demo_numbers) likelihoods = compute_likelihood_plps(plps, demonstrations) particles = [] particle_log_probs = [] for plp, prior, likelihood in zip(plps, plp_priors, likelihoods): particles.append(plp) particle_log_probs.append(prior + likelihood) print("\nDone!") map_idx = np.argmax(particle_log_probs).squeeze() print("MAP program ({}):".format(particle_log_probs[map_idx])) print(particles[map_idx]) top_particles, top_particle_log_probs = select_particles(particles, particle_log_probs, max_num_particles) if len(top_particle_log_probs) > 0: top_particle_log_probs = np.array(top_particle_log_probs) - logsumexp(top_particle_log_probs) top_particle_probs = np.exp(top_particle_log_probs) print("top_particle_probs:", top_particle_probs) policy = PLPPolicy(top_particles, top_particle_probs) else: print("no nontrivial particles found") policy = PLPPolicy([StateActionProgram("False")], [1.0]) return policy
def extract_plp_from_dt(estimator, features, feature_log_probs): n_nodes = estimator.tree_.node_count children_left = estimator.tree_.children_left children_right = estimator.tree_.children_right node_to_features = estimator.tree_.feature threshold = estimator.tree_.threshold value = estimator.tree_.value.squeeze() stack = [0] parents = {0 : None} true_leaves = [] total_pos_leaf = 0 total_neg_leaf = 0 while len(stack) > 0: node_id = stack.pop() if (children_left[node_id] != children_right[node_id]): assert 0 < threshold[node_id] < 1 stack.append(children_left[node_id]) parents[children_left[node_id]] = (node_id, 'left') stack.append(children_right[node_id]) parents[children_right[node_id]] = (node_id, 'right') elif value[node_id][1] > value[node_id][0]: true_leaves.append(node_id) total_pos_leaf = value[node_id][1] + total_pos_leaf total_neg_leaf = value[node_id][0] + total_neg_leaf true_leaves.append(node_id) print("Sum of total positive leaves {}".format(total_pos_leaf)) print("Sum of total negative leaves {}".format(total_neg_leaf)) try: print("Likelihood? {}".format(np.log(total_pos_leaf/(total_pos_leaf+total_neg_leaf)))) except: print("Likelihood {}".format("Nan")) paths_to_true_leaves = [get_path_to_leaf(leaf, parents) for leaf in true_leaves] conjunctive_programs = [] program_log_prob = 0. for path in paths_to_true_leaves: and_program, log_p = get_conjunctive_program(path, node_to_features, features, feature_log_probs) conjunctive_programs.append(and_program) program_log_prob += log_p disjunctive_program = get_disjunctive_program(conjunctive_programs) if not isinstance(disjunctive_program, StateActionProgram): disjunctive_program = StateActionProgram(disjunctive_program) return disjunctive_program, program_log_prob
def generate_programs(grammar, start_symbol=0, num_iterations=100000000): queue = [] counter = itertools.count() hq.heappush(queue, (0, 0, next(counter), [start_symbol])) for iteration in range(num_iterations): priority, production_neg_log_prob, _, program = hq.heappop(queue) for child_program, child_production_prob, child_priority in get_child_programs(program, grammar): if program_is_complete(child_program): yield StateActionProgram(stringify(child_program)), -production_neg_log_prob + np.log(child_production_prob) else: hq.heappush(queue, (priority + child_priority, production_neg_log_prob - np.log(child_production_prob), next(counter), child_program))
def extract_plp_from_dt(estimator, features, feature_log_probs): n_nodes = estimator.tree_.node_count children_left = estimator.tree_.children_left children_right = estimator.tree_.children_right node_to_features = estimator.tree_.feature threshold = estimator.tree_.threshold value = estimator.tree_.value.squeeze() stack = [0] parents = {0: None} true_leaves = [] while len(stack) > 0: node_id = stack.pop() try: if (children_left[node_id] != children_right[node_id]): assert 0 < threshold[node_id] < 1 stack.append(children_left[node_id]) parents[children_left[node_id]] = (node_id, 'left') stack.append(children_right[node_id]) parents[children_right[node_id]] = (node_id, 'right') elif value[node_id][1] > value[node_id][0]: true_leaves.append(node_id) except IndexError: if value[1] > value[0]: true_leaves.append(node_id) paths_to_true_leaves = [ get_path_to_leaf(leaf, parents) for leaf in true_leaves ] conjunctive_programs = [] program_log_prob = 0. for path in paths_to_true_leaves: and_program, log_p = get_conjunctive_program(path, node_to_features, features, feature_log_probs) conjunctive_programs.append(and_program) program_log_prob += log_p disjunctive_program = get_disjunctive_program(conjunctive_programs) if not isinstance(disjunctive_program, StateActionProgram): disjunctive_program = StateActionProgram(disjunctive_program) return disjunctive_program, program_log_prob