예제 #1
0
def train(base_class_name, demo_numbers, program_generation_step_size, num_programs, num_dts, max_num_particles):
    programs, program_prior_log_probs = get_program_set(base_class_name, num_programs)

    X, y = run_all_programs_on_demonstrations(base_class_name, num_programs, demo_numbers)
    plps, plp_priors = learn_plps(X, y, programs, program_prior_log_probs, num_dts=num_dts,
        program_generation_step_size=program_generation_step_size)

    demonstrations = get_demonstrations(base_class_name, demo_numbers=demo_numbers)
    likelihoods = compute_likelihood_plps(plps, demonstrations)

    particles = []
    particle_log_probs = []

    for plp, prior, likelihood in zip(plps, plp_priors, likelihoods):
        particles.append(plp)
        particle_log_probs.append(prior + likelihood)

    print("\nDone!")
    map_idx = np.argmax(particle_log_probs).squeeze()
    print("MAP program ({}):".format(particle_log_probs[map_idx]))
    print(particles[map_idx])

    top_particles, top_particle_log_probs = select_particles(particles, particle_log_probs, max_num_particles)
    if len(top_particle_log_probs) > 0:
        top_particle_log_probs = np.array(top_particle_log_probs) - logsumexp(top_particle_log_probs)
        top_particle_probs = np.exp(top_particle_log_probs)
        print("top_particle_probs:", top_particle_probs)
        policy = PLPPolicy(top_particles, top_particle_probs)
    else:
        print("no nontrivial particles found")
        policy = PLPPolicy([StateActionProgram("False")], [1.0])

    return policy
예제 #2
0
def run_all_programs_on_single_demonstration(base_class_name, num_programs, demo_number, program_interval=1000):
    """
    Run all programs up to some iteration on one demonstration.

    Expensive in general because programs can be slow and numerous, so caching can be very helpful.

    Parallelization is designed to save time in the regime of many programs.

    Care is taken to avoid memory issues, which are a serious problem when num_programs exceeds 50,000.

    Returns classification dataset X, y.

    Parameters
    ----------
    base_class_name : str
    num_programs : int
    demo_number : int
    program_interval : int
        This interval splits up program batches for parallelization.

    Returns
    -------
    X : csr_matrix
        X.shape = (num_demo_items, num_programs)
    y : [ bool ]
        y.shape = (num_demo_items,)
    """

    print("Running all programs on {}, {}".format(base_class_name, demo_number))

    programs, _ = get_program_set(base_class_name, num_programs)

    demonstration = get_demonstrations(base_class_name, demo_numbers=(demo_number,))
    positive_examples, negative_examples = extract_examples_from_demonstration(demonstration)
    y = [1] * len(positive_examples) + [0] * len(negative_examples)

    num_data = len(y)
    num_programs = len(programs)

    X = lil_matrix((num_data, num_programs), dtype=bool)

    # This loop avoids memory issues
    for i in range(0, num_programs, program_interval):
        end = min(i+program_interval, num_programs)
        print('Iteration {} of {}'.format(i, num_programs), end='\r')

        num_workers = multiprocessing.cpu_count()
        pool = multiprocessing.Pool(num_workers)

        fn = partial(apply_programs, programs[i:end])
        fn_inputs = positive_examples + negative_examples
        
        results = pool.map(fn, fn_inputs)
        pool.close()

        for X_idx, x in enumerate(results):
            X[X_idx, i:end] = x

    X = X.tocsr()

    print()
    return X, y