Beispiel #1
0
 def run_one_simulation(eco_network=network):
     tf_runtime = runtime.TFRuntime(network=eco_network)
     final_value = tf_runtime.execute(num_steps=horizon)
     _, final_reward = network_lib.find_unique_field(
         final_value, field_name='cumulative_reward')
     r = tf.reduce_mean(final_reward)
     return r, r * r
Beispiel #2
0
    def test_disaggregated_log_prob(self):

        z, o, _, _ = self.chained_rv_test_network()
        log_prob_vars = log_probability.log_prob_variables_from_observation(
            [z], [o])
        aggregators = log_probability.log_prob_accumulator_variables(
            log_prob_vars)
        aggregators.append(
            log_probability.total_log_prob_accumulator_variable(log_prob_vars))
        tf_runtime = runtime.TFRuntime(network=network_lib.Network(
            variables=[o] + log_prob_vars + aggregators))
        lptraj = tf_runtime.trajectory(4)
        self.assertSetEqual(
            set(lptraj.keys()),
            set([
                "o", "z_log_prob", "z_log_prob_accum", "total_log_prob_accum"
            ]))
        total_lp = lptraj["total_log_prob_accum"]
        z_lp = lptraj["z_log_prob"]
        z_cum_lp = lptraj["z_log_prob_accum"]
        a0_lp = 0.0
        a1_lp = 0.0
        for i in range(4):
            ref = log_probability.log_probability(variables=[z],
                                                  observation=[o],
                                                  num_steps=i)
            self.assertAllClose(total_lp.get("accum")[i], ref)
            self.assertAllClose(
                z_cum_lp.get("a0")[i] + z_cum_lp.get("a1")[i], ref)
            a0_lp += z_lp.get("a0")[i]
            a1_lp += z_lp.get("a1")[i]
            self.assertAllClose(z_cum_lp.get("a0")[i], a0_lp)
            self.assertAllClose(z_cum_lp.get("a1")[i], a1_lp)
def log_probability(variables, observation, num_steps, graph_compile=True):
    """Returns the joint log probability of an observation given a network.

  Please note that the correctness of the result requires that all of the value
  functions of all the `Variable`s create `ed.RandomVariable` objects in a
  stable order. In other words, if a value function is invoked twice, it will
  create logically corresponding `ed.RandomVariable` objects in the same order.

  Args:
    variables: A sequence of `Variable`s defining a dynamic Bayesian network
      (DBN).
    observation: A sequence of `Variable`s that corresponds one-to-one with
      `variables` and which defines an observation of the DBN.
    num_steps: The number of time steps over which to measure the probability.
    graph_compile: Boolean indicating whether the computation should be run in
      graph mode.

  Returns:
    A Tensor like that returned from `tfp.distributions.Distribution.log_prob`.
  """

    log_prob_vars = log_prob_variables_from_observation(variables, observation)
    accumulator = total_log_prob_accumulator_variable(log_prob_vars)
    tf_runtime = runtime.TFRuntime(network=Network(
        variables=list(observation) + list(log_prob_vars) + [accumulator]),
                                   graph_compile=graph_compile)

    return tf_runtime.execute(num_steps)["total_log_prob_accum"].get("accum")
def make_runtime(variables):
    """Makes simulation + policy log-prob runtime."""
    variables = list(variables)
    slate_var = [var for var in variables if 'slate docs' == var.name]
    log_prob_var = log_probability.log_prob_variables_from_direct_output(
        slate_var)
    accumulator = log_probability.log_prob_accumulator_variables(log_prob_var)
    tf_runtime = runtime.TFRuntime(network=network_lib.Network(
        variables=list(variables) + list(log_prob_var) + list(accumulator)),
                                   graph_compile=False)
    return tf_runtime
Beispiel #5
0
 def test_log_probs_from_direct_output(self):
     z, _, _, _ = self.chained_rv_test_network()
     online_lp_vars = log_probability.log_prob_variables_from_direct_output(
         [z])
     tf_runtime = runtime.TFRuntime(network=network_lib.Network(
         variables=[z] + online_lp_vars))
     online_lp_traj = tf_runtime.trajectory(4)
     self.assertSetEqual(set(online_lp_traj.keys()),
                         set(["z", "z_log_prob"]))
     o = data.data_variable(
         name="o",
         spec=ValueSpec(a0=FieldSpec(), a1=FieldSpec()),
         data_sequence=data.SlicedValue(value=online_lp_traj["z"]))
     offline_lp_vars = log_probability.log_prob_variables_from_observation(
         [z], [o])
     tf_runtime = runtime.TFRuntime(network=network_lib.Network(
         variables=[o] + offline_lp_vars))
     offline_lp_traj = tf_runtime.trajectory(4)
     self.assertAllClose(online_lp_traj["z_log_prob"].get("a0"),
                         offline_lp_traj["z_log_prob"].get("a0"))
     self.assertAllClose(online_lp_traj["z_log_prob"].get("a1"),
                         offline_lp_traj["z_log_prob"].get("a1"))
Beispiel #6
0
def main(argv):
    del argv
    running_times = []
    population_sizes = [10, 100, 1000]
    steps = 10
    for population_size in population_sizes:
        print("building simulation for steps={}, population_size={}".format(
            steps, population_size))
        tf_runtime = runtime.TFRuntime(network=network_lib.Network(
            variables=model(population_size)))
        print("starting simulation for steps={}, population_size={}".format(
            steps, population_size))
        sim_start = time.time()
        final_value = tf_runtime.execute(num_steps=steps)
        for var, value in final_value.items():
            for name, val in value.as_dict.items():
                print(var, ".", name, "=")
                tf.print(val)
        elapsed_time = time.time() - sim_start
        print("simulation for steps={}, population_size={} took {} sec".format(
            steps, population_size, elapsed_time))
        running_times.append(elapsed_time)
    print(running_times)
Beispiel #7
0
 def test_log_probability(self, graph_compile):
     tf.random.set_seed(0)
     horizon = 2
     variables, _ = (
         simulation_config.create_interest_evolution_simulation_network(
             num_users=5, num_topics=5, num_docs=5, freeze_corpus=False))
     network = network_lib.Network(variables=variables)
     filepath = os.path.join(os.path.dirname(__file__), 'trajectory.pickle')
     traj = util.pickle_to_network_value_trajectory(filepath, network)
     variables = tuple(variables)
     observations = log_probability.replay_variables(variables, traj)
     lp_vars = log_probability.log_prob_variables_from_observation(
         variables, observations)
     # Filtering out slate docs because their probability is parameterized by
     # the outputs of the scoring model, which gets initialized randomly.
     lp_vars = [v for v in lp_vars if v.name != 'slate docs_log_prob']
     accumulator = log_probability.total_log_prob_accumulator_variable(
         lp_vars)
     tf_runtime = runtime.TFRuntime(network=network_lib.Network(
         variables=list(observations) + list(lp_vars) + [accumulator]),
                                    graph_compile=graph_compile)
     log_prob_no_slate = tf_runtime.execute(
         horizon - 1)['total_log_prob_accum'].get('accum')
     self.assertAllClose(log_prob_no_slate, -100.38593292236328)
def main(argv):
    del argv
    horizon = 6
    num_users = 5
    num_topics = 3
    slate_size = 4
    num_iters = 100
    # Set sensitivity to 0.8 for all users to generate trajectories.
    variables = simulation_config.create_latent_variable_model_network(
        num_users=num_users, num_topics=num_topics, slate_size=slate_size)
    data_generation_network = network_lib.Network(variables=variables)
    tf_runtime = runtime.TFRuntime(network=data_generation_network)
    traj = dict(tf_runtime.trajectory(length=horizon))
    print('===============GROUND TRUTH LIKELIHOOD================')
    print(
        log_probability.log_probability_from_value_trajectory(
            variables=variables, value_trajectory=traj, num_steps=horizon - 1))
    print('======================================================')

    t_begin = time.time()
    # Try to recover the sensitivity.
    sensitivity_var = tf.Variable(
        tf.linspace(0., 1., num=num_users),
        dtype=tf.float32,
        constraint=lambda x: tf.clip_by_value(x, 0.0, 1.0))
    story = lambda: simulation_config.create_latent_variable_model_network(  # pylint: disable=g-long-lambda
        num_users=num_users,
        num_topics=num_topics,
        slate_size=slate_size,
        satisfaction_sensitivity=sensitivity_var)
    trainable_vars = entity.story_with_trainable_variables(
        story)[1]['ModelLearningDemoUser']

    def unnormalized_log_prob_train(intent):
        # Hold out the user intent in the trajectories.
        intent_traj = tf.expand_dims(intent, axis=0) + tf.zeros(
            (horizon, num_users, num_topics))
        user_state_dict = dict(traj['user state'].as_dict)
        user_state_dict['intent'] = intent_traj
        traj['user state'] = Value(**user_state_dict)
        return log_probability.log_probability_from_value_trajectory(
            variables=story(), value_trajectory=traj, num_steps=horizon - 1)

    # Initialize the HMC transition kernel.
    num_results = int(1e3)
    num_burnin_steps = int(5e2)
    adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation(
        tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn=unnormalized_log_prob_train,
            num_leapfrog_steps=5,
            step_size=1e-4),
        num_adaptation_steps=int(num_burnin_steps * 0.8))

    # Run the chain (with burn-in).
    @tf.function
    def run_chain():
        samples, is_accepted = tfp.mcmc.sample_chain(
            num_results=num_results,
            num_burnin_steps=num_burnin_steps,
            current_state=tfd.Normal(loc=tf.ones(
                (num_users, num_topics)) / num_users,
                                     scale=1.0).sample(),
            kernel=adaptive_hmc,
            trace_fn=lambda _, pkr: pkr.inner_results.is_accepted)

        sample_mean = tf.reduce_mean(samples)
        sample_stddev = tf.math.reduce_std(samples)
        is_accepted = tf.reduce_mean(tf.cast(is_accepted, dtype=tf.float32))
        return samples, sample_mean, sample_stddev, is_accepted

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.02)
    for i in range(num_iters):
        posterior_samples, sample_mean, sample_stddev, is_accepted = run_chain(
        )
        print('mean:{:.4f}  stddev:{:.4f}  acceptance:{:.4f}'.format(
            sample_mean.numpy(), sample_stddev.numpy(), is_accepted.numpy()))
        log_probs = []
        with tf.GradientTape() as tape:
            log_probs = tf.vectorized_map(
                unnormalized_log_prob_train,
                posterior_samples[num_burnin_steps:, ])
            log_prob = -tf.reduce_mean(log_probs)
        grads = tape.gradient(log_prob, trainable_vars)
        optimizer.apply_gradients(zip(grads, trainable_vars))
        print(i, trainable_vars[0].numpy(), tf.reduce_mean(log_probs).numpy())
    print('Elapsed time: %.3f seconds' % (time.time() - t_begin))