Ejemplo n.º 1
0
def main(args):
    # create an importance sampler (the prior is used as the proposal distribution)
    importance = Importance(model, guide=None, num_samples=args.num_samples)
    # get posterior samples of mu (which is the return value of model)
    # from the raw execution traces provided by the importance sampler.
    print("doing importance sampling...")
    emp_marginal = EmpiricalMarginal(importance.run(observed_data))

    # calculate statistics over posterior samples
    posterior_mean = emp_marginal.mean
    posterior_std_dev = emp_marginal.variance.sqrt()

    # report results
    inferred_mu = posterior_mean.item()
    inferred_mu_uncertainty = posterior_std_dev.item()
    print("the coefficient of friction inferred by pyro is %.3f +- %.3f" %
          (inferred_mu, inferred_mu_uncertainty))

    # note that, given the finite step size in the simulator, the simulated descent times will
    # not precisely match the numbers from the analytic result.
    # in particular the first two numbers reported below should match each other pretty closely
    # but will be systematically off from the third number
    print("the mean observed descent time in the dataset is: %.4f seconds" %
          observed_mean)
    print(
        "the (forward) simulated descent time for the inferred (mean) mu is: %.4f seconds"
        % simulate(posterior_mean).item())
    print((
        "disregarding measurement noise, elementary calculus gives the descent time\n"
        + "for the inferred (mean) mu as: %.4f seconds") %
          analytic_T(posterior_mean.item()))
    """
Ejemplo n.º 2
0
def main(args):
    # create an importance sampler (the prior is used as the proposal distribution)
    importance = Importance(model, guide=None, num_samples=args.num_samples)
    # get posterior samples of mu (which is the return value of model)
    # from the raw execution traces provided by the importance sampler.
    print("doing importance sampling...")
    emp_marginal = EmpiricalMarginal(importance.run(observed_data))

    # calculate statistics over posterior samples
    posterior_mean = emp_marginal.mean
    posterior_std_dev = emp_marginal.variance.sqrt()

    # report results
    inferred_mu = posterior_mean.item()
    inferred_mu_uncertainty = posterior_std_dev.item()
    print("the coefficient of friction inferred by pyro is %.3f +- %.3f" %
          (inferred_mu, inferred_mu_uncertainty))

    # note that, given the finite step size in the simulator, the simulated descent times will
    # not precisely match the numbers from the analytic result.
    # in particular the first two numbers reported below should match each other pretty closely
    # but will be systematically off from the third number
    print("the mean observed descent time in the dataset is: %.4f seconds" % observed_mean)
    print("the (forward) simulated descent time for the inferred (mean) mu is: %.4f seconds" %
          simulate(posterior_mean).item())
    print(("disregarding measurement noise, elementary calculus gives the descent time\n" +
           "for the inferred (mean) mu as: %.4f seconds") % analytic_T(posterior_mean.item()))

    """
Ejemplo n.º 3
0
    def get_log_marginal_density(loader):
        model.eval()
        meter = AverageMeter()
        pbar = tqdm(total=len(loader))

        with torch.no_grad():
            for _, response, _, mask in loader:
                mb = response.size(0)
                response = response.to(device)
                mask = mask.long().to(device)

                posterior = Importance(
                    model.model,
                    guide=model.guide,
                    num_samples=args.num_posterior_samples,
                )
                posterior = posterior.run(response, mask)
                log_weights = torch.stack(posterior.log_weights)
                marginal = torch.logsumexp(log_weights, 0) - math.log(
                    log_weights.size(0))
                meter.update(marginal.item(), mb)

                pbar.update()
                pbar.set_postfix({'Marginal': meter.avg})

        pbar.close()
        print('====> Marginal: {:.4f}'.format(meter.avg))

        return meter.avg
def policy_control_as_inference_like(env,
                                     *,
                                     trajectory_model,
                                     agent_model,
                                     log=False):
    """policy_control_as_inference_like

    Implements a control-as-inference-like policy which "maximizes"
    $\\Pr(A_0 \\mid S_0, high G)$.

    Not actually standard CaI, because we don't really condition on G;  rather,
    we use $\\alpha G$ as a likelihood factor on sample traces.

    :param env: OpenAI Gym environment
    :param trajectory_model: trajectory probabilistic program
    :param agent_model: agent's probabilistic program
    :param log: boolean; if True, print log info
    """
    inference = Importance(trajectory_model, num_samples=args.num_samples)
    posterior = inference.run(env, agent_model=agent_model, factor_G=True)
    marginal = EmpiricalMarginal(posterior, 'A_0')

    if log:
        samples = marginal.sample((args.num_samples, ))
        counts = Counter(samples.tolist())
        hist = [
            counts[i] / args.num_samples for i in range(env.action_space.n)
        ]
        print('policy:')
        print(tabulate([hist], headers=env.actions, tablefmt='fancy_grid'))

    return marginal.sample()
def softmax_agent_model(env):
    """softmax_agent_model

    Softmax agent model;  Performs inference to estimate $Q^\pi(s, a)$, then
    uses pyro.factor to modify the trace log-likelihood.

    :param env: OpenAI Gym environment
    """
    policy_probs = torch.ones(env.state_space.n, env.action_space.n)
    policy_vector = pyro.sample('policy_vector', Categorical(policy_probs))

    inference = Importance(trajectory_model, num_samples=args.num_samples)
    posterior = inference.run(env, lambda state: policy_vector[state])
    Q = EmpiricalMarginal(posterior, 'G').mean

    pyro.factor('factor_Q', args.alpha * Q)

    return policy_vector
Ejemplo n.º 6
0
def softmax_agent_model(t, env, *, trajectory_model):
    """softmax_agent_model

    Softmax agent model;  Performs inference to estimate $Q^\pi(s, a)$, then
    uses pyro.factor to modify the trace log-likelihood.

    :param t: time-step
    :param env: OpenAI Gym environment
    :param trajectory_model: trajectory probabilistic program
    """
    action_probs = torch.ones(env.action_space.n)
    action = pyro.sample(f'A_{t}', Categorical(action_probs))

    inference = Importance(trajectory_model, num_samples=args.num_samples)
    posterior = inference.run(t, env, action)
    Q = EmpiricalMarginal(posterior, f'G_{t}').mean

    pyro.factor(f'softmax_{t}', args.alpha * Q)

    return action
Ejemplo n.º 7
0
def policy(t, env, *, trajectory_model, log=False):
    """policy

    :param t: time-step
    :param env: OpenAI Gym environment
    :param trajectory_model: trajectory probabilistic program
    :param log: boolean; if True, print log info
    """
    inference = Importance(softmax_agent_model, num_samples=args.num_samples)
    posterior = inference.run(t, env, trajectory_model=trajectory_model)
    marginal = EmpiricalMarginal(posterior, f'A_{t}')

    if log:
        samples = marginal.sample((args.num_samples, ))
        counts = Counter(samples.tolist())
        hist = [
            counts[i] / args.num_samples for i in range(env.action_space.n)
        ]
        print('policy:')
        print(tabulate([hist], headers=env.actions, tablefmt='fancy_grid'))

    return marginal.sample()
def policy(env, log=False):
    """policy

    :param env: OpenAI Gym environment
    :param log: boolean; if True, print log info
    """
    inference = Importance(softmax_agent_model, num_samples=args.num_samples)
    posterior = inference.run(env)
    marginal = EmpiricalMarginal(posterior, 'policy_vector')

    if log:
        policy_samples = marginal.sample((args.num_samples, ))
        action_samples = policy_samples[:, env.state]
        counts = Counter(action_samples.tolist())
        hist = [
            counts[i] / args.num_samples for i in range(env.action_space.n)
        ]
        print('policy:')
        print(tabulate([hist], headers=env.actions, tablefmt='fancy_grid'))

    policy_vector = marginal.sample()
    return policy_vector[env.state]
Ejemplo n.º 9
0
def expected_reward(Q_function, action, env, i):
    def get_posterior_mean(posterior, n_samples=30):
        """
        Calculate posterior mean
        """
        # Sample
        marginal_dist = EmpiricalMarginal(posterior).sample(
            (n_samples, 1)).float()
        # assumed to be all the same
        return torch.mean(marginal_dist)

    # The use of the param store is an optimization
    param_name = 'posterior_reward_state{}_{}'.format(env.s, i)
    if param_name in list(pyro.get_param_store().keys()):
        posterior_mean = pyro.get_param_store().get_param(param_name)
        return posterior_mean
    else:
        # this gets slower as we increase num_samples
        inference = Importance(Q_function, num_samples=30)
        posterior = inference.run(action, env, i)
        posterior_mean = get_posterior_mean(posterior, 30)
        pyro.param(param_name, posterior_mean)
        return posterior_mean
def main():
    """
    This main routine will run the agent using pyro's EmpiricalMarginal and Importance methods as opposed to the
    main.py which uses a custom loop to step the agent. This main routine runs but is less flexible when it comes to
    visualising due to the samples in the posterior being private. It was also found that trying to pass the
    posterior back as the prior did not yield clear results but did step the model forward in the expected direction.

	This should be looked into further.
	:return:
	"""
    posterior = None
    sensor = build_observations()
    agent = Agent(x=0., y=500.)
    agent.pick_destination(doors=environment.doors)

    # This is the inference algorithm. From what is understood this simply just samples the stochastic
    # function and builds a distribution.
    infer = Importance(model=agent.step, num_samples=1000)

    for step in range(n_steps):
        # Assimilate observation and update the prior with the posterior distribution.
        if (step % 10) == 0:  # For every n steps, calibrate.
            obs = sensor.aggregate_obs(step)
            print('\nAssimilating_Observation at Step {}'.format(step))
            sensor.print_detail(step)
        else:
            # Else do not make an observation and simply run the model forward using the agents internal position.
            obs = None

        # This contains the attributes such as mean and std for the posterior which then can be used to pass back as
        #
        posterior = pyro.infer.EmpiricalMarginal(infer.run(posterior=posterior,
                                                           obs=obs),
                                                 sites=['xy'])
        print_agent_loc(agent, step)
        print_posterior(posterior)