def log_likelihood_components(self, s, u, v, w, data, *args, **kwargs): """Returns the log likelihood without summing along axes Arguments: s {tf.Tensor} -- Samples of s u {tf.Tensor} -- Samples of u v {tf.Tensor} -- Samples of v w {tf.Tensor} -- Samples of w Keyword Arguments: data {tf.Tensor} -- Count matrix (default: {None}) Returns: [tf.Tensor] -- log likelihood in broadcasted shape """ theta_u = self.encode(data[self.count_key], u, s) phi = self.intercept_matrix(w, s) B = self.decoding_matrix(v) theta_beta = tf.matmul(theta_u, B) theta_beta = self.decoder_function(theta_beta) rate = theta_beta + phi rv_poisson = tfd.Poisson(rate=rate) return { 'log_likelihood': rv_poisson.log_prob(tf.cast(data[self.count_key], self.dtype)), 'rate': rate }
def fn(key1, key2, seed=None): return [ tfd.Normal(0., 1.).sample([3, 2], seed=seed), { key1: tfd.Poisson([1., 2., 3., 4.]).sample(seed=seed + 1), key2: tfd.LogNormal(0., 1.).sample(seed=seed + 2) } ]
def __call__(self): """Get the distribution object from the backend""" if get_backend() == 'pytorch': import torch.distributions as tod return tod.poisson.Poisson(self.rate) else: from tensorflow_probability import distributions as tfd return tfd.Poisson(self.rate)
def lossf(pars, data): thetasb, thetab = pars nuis_sb = [tfd.Normal(loc=thetasb[i], scale=1) for i in range(Npars)] poises_sb = [tfd.Poisson(rate=s + b + thetasb[i]) for i in range(Npars)] joint_sb = tfd.JointDistributionSequential(poises_sb + nuis_sb) nuis_b = [tfd.Normal(loc=thetab[i], scale=1) for i in range(Npars)] poises_b = [tfd.Poisson(rate=b + thetab[i]) for i in range(Npars)] joint_b = tfd.JointDistributionSequential(poises_b + nuis_b) # Tensor shape matching debugging #print("[sample_shape, batch_shape, event_shape]") #print("joint_sb.batch_shape:",joint_sb.batch_shape[0]) #print("joint_sb.event_shape:",joint_sb.event_shape[0]) #print("samples shapes:", [k.shape for k in samples0][0]) # The broadcasting works like this: # 1. Define n = len(batch_shape) + len(event_shape). (For scalar distributions, len(event_shape)=0.) # 2. If the input tensor t has fewer than n dimensions, pad its shape by adding dimensions of size 1 on the left until it has exactly n dimensions. Call the resulting tensor t'. # 3. Broadcast the n rightmost dimensions of t' against the [batch_shape, event_shape] of the distribution you're computing a log_prob for. In more detail: for the dimensions where t' already matches the distribution, do nothing, and for the dimensions where t' has a singleton, replicate that singleton the appropriate number of times. Any other situation is an error. (For scalar distributions, we only broadcast against batch_shape, since event_shape = [].) # 4. Now we're finally able to compute the log_prob. The resulting tensor will have shape [sample_shape, batch_shape], where sample_shape is defined to be any dimensions of t or t' to the left of the n-rightmost dimensions: sample_shape = shape(t)[:-n]. # We have, e.g. # joint_sb.batch_shape: (10000, 5) # joint_sb.event_shape: () # and we want to compute (10000, 5) log probabilities, broadcasting # 10000 samples over the "5" dimension. # So for that, according to the above rules, the input sample tensor shape # should be: (10000, 1) # And the resulting log-probability tensor should have shape # (10000, 5) qsb = -2 * (joint_sb.log_prob(data)) qb = -2 * (joint_b.log_prob(data)) #print("qsb.shape:", qsb.shape) #print("qsb_s.shape:", qsb_s.shape) total_loss = tf.math.reduce_sum(qsb) + tf.math.reduce_sum(qb) # First return: total loss function value # Second return: 'true' parameter values (for convergence calculations) # Third return: extra variables whose final values you want to know at the end of the optimisation return total_loss, (thetasb, thetab), (qsb, qb)
def _init_distribution(conditions, **kwargs): return tfd.Mixture( cat=tfd.Categorical( probs=[1.0 - conditions["psi"], conditions["psi"]]), components=[ tfd.Deterministic(loc=tf.zeros_like(conditions["theta"])), tfd.Poisson(rate=conditions["theta"]), ], **kwargs, )
def train_HMM(self): # Define variable to represent the unknown log rates. _trainable_log_rates = tf.Variable( np.log(np.mean(self.observed_counts)) + tf.random.normal([self.num_states]), name='log_rates') self.hmm = tfd.HiddenMarkovModel( initial_distribution=tfd.Categorical(logits=initial_state_logits), transition_distribution=tfd.Categorical( probs=self._transition_probs), observation_distribution=tfd.Poisson( log_rate=_trainable_log_rates), num_steps=len(observed_counts))
def _base_dist(self, *args, **kwargs): """ Zero-inflated Poisson base distribution. A ZeroInflatedPoisson is a mixture between a deterministic distribution and a Poisson distribution. """ mix = kwargs.pop("mix") return tfd.Mixture( cat=tfd.Categorical(probs=[mix, 1.0 - mix]), components=[tfd.Deterministic(0.0), tfd.Poisson(*args, **kwargs)], name="ZeroInflatedPoisson", )
def empirical_Ey_and_Ey2_tf(a=3, ap=3, bp=1.0, c=3, cp=3, dp=1.0, nsamples_latent=100, nsamples_latent1=1, nsamples_output=10, K=25, N=1, M=1): """ Returns E_prior[Y] and E_prior[Y^2] for given set of hyperparameters. Parametrization like in: http://jakehofman.com/inprint/poisson_recs.pdf """ if N != 1: warnings.warn("N!=1 will be ignored!") if N != 1: warnings.warn("M!=1 will be ignored!") #a, ap, bp, c, cp, dp = _ttf(a), _ttf(ap), _ttf(bp), _ttf(c), _ttf(cp), _ttf(dp) # cast to tf ksi = tfd.Gamma(ap, ap / bp).sample(nsamples_latent) # NL0 theta = tfd.Gamma(a, ksi).sample((K, nsamples_latent1)) # K x NL1 x NL0 eta = tfd.Gamma(cp, cp / dp).sample(nsamples_latent) beta = tfd.Gamma(c, eta).sample((K, nsamples_latent1)) latent = tf.reduce_sum(theta * beta, 0) # NL1 x NL0 latent = tf.reshape(latent, [-1]) # NL1*NL0 poisson = tfd.Poisson(rate=latent) #y_samples = np.random.poisson(latent, size=[nsamples_output, nsamples_latent*nsamples_latent1]) # NO x NL1*NL0 y_samples = tf.stop_gradient(poisson.sample([nsamples_output])) y_probs = tf.exp(poisson.log_prob(y_samples)) #y_probs1 = np.array([[tf.exp(tfd.Poisson(rate=latent[i]).log_prob(y_samples[j,i])).numpy() # for j in range(nsamples_output)] # for i in range(nsamples_latent * nsamples_latent1)]).T #assert (y_probs - y_probs1).numpy().max()<1e-12 total_prob = tf.reduce_sum(y_probs, 0) conditional_expectation = tf.reduce_sum(y_probs * y_samples, 0) / total_prob conditional_expectation_squared = tf.reduce_sum(y_probs * (y_samples**2), 0) / total_prob expectation = tf.reduce_mean(conditional_expectation) expectation_squared = tf.reduce_mean(conditional_expectation_squared) return expectation, expectation_squared
def get_poisson(self, Input, depth=None): """ :param Input: (T, Dx) :param external_inputs: total counts, (T, ) :return: """ with tf.variable_scope(self.name): lambdas = self.transformation.transform(Input) lambdas = tf.nn.softplus(lambdas) + 1e-6 # (T, Dy) lambdas = lambdas / tf.reduce_sum(lambdas, axis=-1, keepdims=True) lambdas = lambdas * depth[..., None] # (bs, T, Dy) poisson = tfd.Poisson(rate=lambdas, validate_args=True, allow_nan_stats=False) return poisson
def joint_dist(): alpha = yield tfd.Normal(loc=0.0, scale=1.0) home = yield tfd.Normal(loc=0.0, scale=1.0) sd_att = yield tfd.HalfNormal(scale=1.0) sd_def = yield tfd.HalfNormal(scale=1.0) attack = yield tfd.Normal(loc=tf.zeros(nt), scale=sd_att) defend = yield tfd.Normal(loc=tf.zeros(nt), scale=sd_def) home_log_rate = ( alpha + home + tf.gather(attack, home_id, axis=-1) - tf.gather(defend, away_id, axis=-1) ) away_log_rate = ( alpha + tf.gather(attack, away_id, axis=-1) - tf.gather(defend, home_id, axis=-1) ) yield tfd.Poisson(log_rate=home_log_rate) yield tfd.Poisson(log_rate=away_log_rate)
def empirical_Ey_and_Ey2_tf_logscore(a=3, ap=3, bp=1.0, c=3, cp=3, dp=1.0, nsamples_latent=100, nsamples_latent1=1, nsamples_output=10, K=25, N=1, M=1): """ Returns E_prior[Y] and E_prior[Y^2] for given set of hyperparameters. Parametrization like in: http://jakehofman.com/inprint/poisson_recs.pdf Gradients obtained with log-score derivative trick. """ if N != 1: warnings.warn("N!=1 will be ignored!") if N != 1: warnings.warn("M!=1 will be ignored!") ksi = tfd.Gamma(ap, ap / bp).sample(nsamples_latent) # NL0 theta = tfd.Gamma(a, ksi).sample((K, nsamples_latent1)) # K x NL1 x NL0 eta = tfd.Gamma(cp, cp / dp).sample(nsamples_latent) beta = tfd.Gamma(c, eta).sample((K, nsamples_latent1)) latent = tf.reduce_sum(theta * beta, 0) # NL1 x NL0 latent = tf.reshape(latent, [-1]) # NL1*NL0 poisson = tfd.Poisson(rate=latent) y_samples = poisson.sample([nsamples_output]) conditional_expectation = tfp.monte_carlo.expectation( f=lambda x: x, samples=y_samples, log_prob=poisson.log_prob, use_reparameterization=False) conditional_expectation_squared = tfp.monte_carlo.expectation( f=lambda x: x * x, samples=y_samples, log_prob=poisson.log_prob, use_reparameterization=False) expectation = tf.reduce_mean(conditional_expectation) expectation_squared = tf.reduce_mean(conditional_expectation_squared) return expectation, expectation_squared
def predictive_distribution(self, data, **params): neural_networks = self.neural_network_model.assemble_networks(params) rates = tf.math.exp( neural_networks( tf.cast(data['data'], self.neural_network_model.dtype) / tf.cast(self.column_norm_factor, self.neural_network_model.dtype))) rates = tf.cast(rates, self.dtype) rates *= self.column_norm_factor rv_poisson = tfd.Poisson(rate=rates) log_lik = rv_poisson.log_prob( tf.cast(data['data'], self.dtype)[tf.newaxis, ...]) log_lik = tf.reduce_sum(log_lik, axis=-1) log_lik = tf.reduce_sum(log_lik, axis=-1) return {"log_likelihood": log_lik, "rates": rates}
def empirical_Ey_and_Ey2_tf_logscore(ct=1.0, rt=1.0, cb=0.1, rb=0.1, nsamples_latent=100, nsamples_output=3, N=1, M=1, K=25): """ Returns E_prior[Y] and E_prior[Y^2] for given set of hyperparameters. The outputs are (tf) differentiable w.r.t. hyperparameters. Gradients are obtained using log-score derivative trick. """ if N != 1: warnings.warn("N!=1 will be ignored!") if N != 1: warnings.warn("M!=1 will be ignored!") theta = tfd.Gamma(ct, rt).sample((K, nsamples_latent)) beta = tfd.Gamma(cb, rb).sample((K, nsamples_latent)) latent = tf.reduce_sum(theta * beta, 0) poisson = tfd.Poisson(rate=latent) y_samples = poisson.sample([nsamples_output]) conditional_expectation = tfp.monte_carlo.expectation( f=lambda x: x, samples=y_samples, log_prob=poisson.log_prob, use_reparameterization=False) conditional_expectation_squared = tfp.monte_carlo.expectation( f=lambda x: x * x, samples=y_samples, log_prob=poisson.log_prob, use_reparameterization=False) expectation = tf.reduce_mean(conditional_expectation) expectation_squared = tf.reduce_mean(conditional_expectation_squared) return expectation, expectation_squared
def empirical_Ey_and_Ey2_tf(ct=1.0, rt=1.0, cb=0.1, rb=0.1, nsamples_latent=100, nsamples_output=3, N=1, M=1, K=25): """ Returns E_prior[Y] and E_prior[Y^2] for given set of hyperparameters. The outputs are (tf) differentiable w.r.t. hyperparameters. """ if N != 1: warnings.warn("N!=1 will be ignored!") if N != 1: warnings.warn("M!=1 will be ignored!") #ct, rt, cb, rb = _make_tf(ct), _make_tf(rt), _make_tf(cb), _make_tf(rb) theta = tfd.Gamma(ct, rt).sample((K, nsamples_latent)) beta = tfd.Gamma(cb, rb).sample((K, nsamples_latent)) latent = tf.reduce_sum(theta * beta, 0) poisson = tfd.Poisson(rate=latent) #y_samples = np.random.poisson(latent, size=[nsamples_output, nsamples_latent]) # NO x NL y_samples = tf.stop_gradient(poisson.sample([nsamples_output])) y_probs = tf.exp(poisson.log_prob(y_samples)) total_prob = tf.reduce_sum(y_probs, 0) conditional_expectation = tf.reduce_sum(y_probs * y_samples, 0) / total_prob conditional_expectation_squared = tf.reduce_sum(y_probs * (y_samples**2), 0) / total_prob expectation = tf.reduce_mean(conditional_expectation) expectation_squared = tf.reduce_mean(conditional_expectation_squared) return expectation, expectation_squared
def _init_distribution(conditions): mu = conditions["mu"] return tfd.Poisson(rate=mu)
def HMM_on_one_file(log_dir): stdout_file, LOG_file, report_csv = get_log_and_std_files(log_dir) data_set = load_log_and_qps(LOG_file, report_csv) bucket_df = vectorize_by_compaction_output_level(data_set) bucket_df["qps"] = data_set.qps_df["interval_qps"] _ = bucket_df.plot(subplots=True) num_states = 5 # memtable filling, flush only, L0 compaction (CPU busy), crowded compaction (disk busy) initial_state_logits = np.zeros([num_states], dtype=np.float32) # uniform distribution initial_state_logits[ 0] = 1.0 # the possiblity of transferring into the Flushing limitation initial_state_logits initial_distribution = tfd.Categorical(probs=initial_state_logits) daily_change_prob = 0.05 transition_probs = daily_change_prob / (num_states - 1) * np.ones( [num_states, num_states], dtype=np.float32) np.fill_diagonal(transition_probs, 1 - daily_change_prob) observed_counts = bucket_df["qps"].fillna(0).tolist() observed_counts = np.array(observed_counts).astype(np.float32) transition_distribution = tfd.Categorical(probs=transition_probs) trainable_log_rates = tf.Variable(np.log(np.mean(observed_counts)) + tf.random.normal([num_states]), name='log_rates') hmm = tfd.HiddenMarkovModel( initial_distribution=initial_distribution, transition_distribution=transition_distribution, observation_distribution=tfd.Poisson(log_rate=trainable_log_rates), num_steps=len(observed_counts)) rate_prior = tfd.LogNormal(5, 5) # def log_prob(): return (tf.reduce_sum( rate_prior.log_prob(tf.math.exp(trainable_log_rates))) + hmm.log_prob(observed_counts)) optimizer = tf.keras.optimizers.Adam(learning_rate=0.1) @tf.function(autograph=False) def train_op(): with tf.GradientTape() as tape: neg_log_prob = -log_prob() grads = tape.gradient(neg_log_prob, [trainable_log_rates])[0] optimizer.apply_gradients([(grads, trainable_log_rates)]) return neg_log_prob, tf.math.exp(trainable_log_rates) # for step in range(201): loss, rates = [t.numpy() for t in train_op()] if step % 20 == 0: print("step {}: log prob {} rates {}".format(step, -loss, rates)) posterior_dists = hmm.posterior_marginals(observed_counts) posterior_probs = posterior_dists.probs_parameter().numpy() most_probable_states = np.argmax(posterior_probs, axis=1) most_probable_rates = rates[most_probable_states] fig = plt.figure(figsize=(10, 4)) ax = fig.add_subplot(1, 1, 1) ax.plot(most_probable_rates, c='green', lw=3, label='inferred rate') ax.plot(observed_counts, c='black', alpha=0.3, label='observed counts') ax.set_ylabel("latent rate") ax.set_xlabel("time") ax.set_title("Inferred latent rate over time") ax.legend(loc=4) output_path = "image/" + log_dir.replace("log_files/", "").replace( "/", "_") mkdir_p(output_path) plt.savefig("{}/state_guessing.pdf".format(output_path), bbox_inches="tight")
# Signal hypotheses (same for all components) # (we will have the same null hypothesis for all of them, s=0) s_in = tf.constant([i for i in range(10)], dtype=float) #s_in = tf.constant([5],dtype=float) s_in2 = tf.expand_dims(s_in, 0) s = tf.broadcast_to(s_in2, shape=(N, len(s_in))) b = tf.expand_dims(tf.constant(50, dtype=float), 0) # Nuisance parameters (independent Gaussians) zero = tf.expand_dims(tf.constant(0, dtype=float), 0) nuis0 = [tfd.Normal(loc=zero, scale=1) for i in range(Npars)] # Bunch of independent Poisson distributions that we want to combine poises0 = [tfd.Poisson(rate=b) for i in range(Npars)] poises0s = [tfd.Poisson(rate=s_in + b) for i in range(Npars)] # Construct joint distributions joint0 = tfd.JointDistributionSequential(poises0 + nuis0) joint0s = tfd.JointDistributionSequential(poises0s + nuis0) # Generate background-only pseudodata to be fitted samples0 = joint0.sample(N) # Generate signal+background pseudodata to be fitted samples0s = joint0s.sample(N) # We want the sample shapes to dimensionally match the versions of # the distributions that have free parameters: # [sample_shape, batch_shape, event_shape]
def _init_distribution(conditions): rate = conditions["rate"] return tfd.Poisson(rate=rate)
"activation function": identity } }, "class": lambda theta: tensorflow_distributions.Bernoulli(logits=theta["logits"] ) }, "poisson": { "parameters": { "log_lambda": { "support": [-10, 10], "activation function": identity } }, "class": lambda theta: tensorflow_distributions.Poisson(rate=tf.exp(theta[ "log_lambda"])) }, "constrained poisson": { "parameters": { "lambda": { "support": [0, 1], "activation function": softmax } }, "class": lambda theta, N: tensorflow_distributions.Poisson(rate=theta["lambda"] * N) }, "lomax": { "parameters": { "log_concentration": {
from tensorflow_probability import distributions as tfd N = 1000 dists = {"A": {}, "B": {}} samples = [] for i in range(N): dists["A"][i] = tfd.Poisson(rate=1e-6) dists["B"][i] = tfd.Poisson(rate=1e-6) #dists += [tfd.Normal(loc = 0, scale = 1)] joint = tfd.JointDistributionNamed(dists) samples = joint.sample(N) print("joint.log_prob =", joint.log_prob(samples))
true_durations = [10, 20, 5, 35] observed_counts = np.concatenate([ scipy.stats.poisson(rate).rvs(num_steps) for (rate, num_steps) in zip(true_rates, true_durations) ]).astype(np.float32) plt.plot(observed_counts) plt.show() initial_state_logits = np.zeros([num_states], dtype=np.float32) # uniform distribution daily_change_prob = 0.05 transition_probs = daily_change_prob / (num_states - 1) * np.ones( [num_states, num_states], dtype=np.float32) np.fill_diagonal(transition_probs, 1 - daily_change_prob) print("Initial state logits:\n{}".format(initial_state_logits)) print("Transition matrix:\n{}".format(transition_probs)) trainable_log_rates = tf.Variable( np.log(np.mean(observed_counts)) + tf.random.normal([num_states]), name='log_rates') hmm = tfd.HiddenMarkovModel( initial_distribution=tfd.Categorical( logits=initial_state_logits), transition_distribution=tfd.Categorical(probs=transition_probs), observation_distribution=tfd.Poisson(log_rate=trainable_log_rates), num_steps=len(observed_counts))
def _base_dist(self, mu: TensorLike, *args, **kwargs): return tfd.Poisson(rate=mu, *args, **kwargs)
def latent_state_number_changing_curve(fitting_area, output_dir_prefix, log_dir_prefix, log_dir, fig_name=""): max_num_states = 10 def build_latent_state(num_states, max_num_states, daily_change_prob=0.05): # Give probability exp(-100) ~= 0 to states outside of the current model. initial_state_logits = -100. * np.ones([max_num_states], dtype=np.float32) initial_state_logits[:num_states] = 0. initial_state_logits[0] = 1. # Build a transition matrix that transitions only within the current # `num_states` states. transition_probs = np.eye(max_num_states, dtype=np.float32) if num_states > 1: transition_probs[:num_states, :num_states] = ( daily_change_prob / (num_states - 1)) np.fill_diagonal(transition_probs[:num_states, :num_states], 1 - daily_change_prob) return initial_state_logits, transition_probs # For each candidate model, build the initial state prior and transition matrix. batch_initial_state_logits = [] batch_transition_probs = [] for num_states in range(1, max_num_states + 1): initial_state_logits, transition_probs = build_latent_state( num_states=num_states, max_num_states=max_num_states) batch_initial_state_logits.append(initial_state_logits) batch_transition_probs.append(transition_probs) batch_initial_state_logits = np.array(batch_initial_state_logits) batch_transition_probs = np.array(batch_transition_probs) trainable_log_rates = tf.Variable( (np.log(np.mean(fitting_area)) * np.ones([batch_initial_state_logits.shape[0], max_num_states]) + tf.random.normal([1, max_num_states])), name='log_rates') hmm = tfd.HiddenMarkovModel( initial_distribution=tfd.Categorical( logits=batch_initial_state_logits), transition_distribution=tfd.Categorical(probs=batch_transition_probs), observation_distribution=tfd.Poisson(log_rate=trainable_log_rates), num_steps=len(fitting_area)) rate_prior = tfd.LogNormal(5, 5) optimizer = tf.keras.optimizers.Adam(learning_rate=0.1) def log_prob(): prior_lps = rate_prior.log_prob(tf.math.exp(trainable_log_rates)) prior_lp = tf.stack( [tf.reduce_sum(prior_lps[i, :i + 1]) for i in range(max_num_states)]) return prior_lp + hmm.log_prob(fitting_area) @tf.function(autograph=False) def train_op(): with tf.GradientTape() as tape: neg_log_prob = -log_prob() grads = tape.gradient(neg_log_prob, [trainable_log_rates])[0] optimizer.apply_gradients([(grads, trainable_log_rates)]) return neg_log_prob, tf.math.exp(trainable_log_rates) for step in range(201): loss, rates = [t.numpy() for t in train_op()] if step % 20 == 0: print("step {}: loss {}".format(step, loss)) num_states = np.arange(1, max_num_states + 1) fig = plt.figure(figsize=(8, 6)) plt.plot(num_states, -loss, "b-", label="likelihood") plt.ylabel("marginal likelihood $\\tilde{p}(x)$") plt.xlabel("number of latent states") plt.legend() plt.twinx() plt.plot(num_states, np.gradient(-loss), "g--", label="gradient") plt.ylabel("Gradient of the likelihood") plt.title("Model selection on latent states") plt.legend() output_path = output_dir_prefix + log_dir.replace(log_dir_prefix, "").replace("/", "_") mkdir_p(output_path) plt.savefig("{}/{}_likelihood_curve.pdf".format(output_path, fig_name), bbox_inches="tight") plt.savefig("{}/{}_likelihood_curve.png".format(output_path, fig_name), bbox_inches="tight") plt.clf() posterior_probs = hmm.posterior_marginals( fitting_area).probs_parameter().numpy() most_probable_states = np.argmax(posterior_probs, axis=-1) fig = plt.figure(figsize=(14, 12)) for i, learned_model_rates in enumerate(rates): ax = fig.add_subplot(4, 3, i + 1) ax.plot(learned_model_rates[most_probable_states[i]], c='green', lw=3, label='inferred rate') ax.plot(fitting_area, c='black', alpha=0.3, label='observed counts') ax.set_ylabel("latent rate") ax.set_xlabel("time") ax.set_title("{}-state model".format(i + 1)) ax.legend(loc=4) plt.tight_layout() plt.savefig("{}/{}_model_fitting_test.pdf".format(output_path, fig_name), bbox_inches="tight") plt.savefig("{}/{}_model_fitting_test.png".format(output_path, fig_name), bbox_inches="tight") plt.clf() pass
def main(args): print("Loading data...") teams, df = load_data() train = df[df["split"] == "train"] nt = len(teams) print("Starting inference...") mcmc = run_inference( num_chains=args.num_chains, num_results=args.num_samples, num_burnin_steps=args.num_warmup, nt=nt, ) samples = dict( zip( ["alpha", "home", "sd_att", "sd_def", "attack", "defend"], [np.swapaxes(sample, 0, 1) for sample in mcmc], ) ) fit = az.from_dict(samples) print("Analyse posterior...") az.plot_forest( fit, var_names=("alpha", "home", "sd_att", "sd_def"), backend="bokeh", ) az.plot_trace( fit, var_names=("alpha", "home", "sd_att", "sd_def"), backend="bokeh", ) # Attack and defence quality = teams.copy() quality = quality.assign( attack=samples["attack"].mean(axis=(0, 1)), attacksd=samples["attack"].std(axis=(0, 1)), defend=samples["defend"].mean(axis=(0, 1)), defendsd=samples["defend"].std(axis=(0, 1)), ) quality = quality.assign( attack_low=quality["attack"] - quality["attacksd"], attack_high=quality["attack"] + quality["attacksd"], defend_low=quality["defend"] - quality["defendsd"], defend_high=quality["defend"] + quality["defendsd"], ) plot_quality(quality) # Predicted goals and table predict = df[df["split"] == "predict"] theta1 = ( samples["alpha"].flatten()[..., np.newaxis] + samples["home"].flatten()[..., np.newaxis] + tf.gather( samples["attack"].reshape(-1, samples["attack"].shape[-1]), predict["Home_id"], axis=-1, ) - tf.gather( samples["defend"].reshape(-1, samples["defend"].shape[-1]), predict["Away_id"], axis=-1, ) ) theta2 = ( samples["alpha"].flatten()[..., np.newaxis] + tf.gather( samples["attack"].reshape(-1, samples["attack"].shape[-1]), predict["Away_id"], axis=-1, ) - tf.gather( samples["defend"].reshape(-1, samples["defend"].shape[-1]), predict["Home_id"], axis=-1, ) ) s1 = np.array(tfd.Poisson(log_rate=theta1).sample()) s2 = np.array(tfd.Poisson(log_rate=theta2).sample()) predicted_full = predict.copy() predicted_full = predicted_full.assign( score1=s1.mean(axis=0).round(), score1error=s1.std(axis=0), score2=s2.mean(axis=0).round(), score2error=s2.std(axis=0), ) predicted_full = train.append( predicted_full.drop(columns=["score1error", "score2error"]) ) print(score_table(df)) print(score_table(predicted_full))
def get_list_of_moment_map(fitting_area): def build_latent_state(num_states, max_num_states, daily_change_prob=0.05): # Give probability exp(-100) ~= 0 to states outside of the current model. initial_state_logits = -100. * np.ones([max_num_states], dtype=np.float32) initial_state_logits[:num_states] = 0. initial_state_logits[0] = 1. # Build a transition matrix that transitions only within the current # `num_states` states. transition_probs = np.eye(max_num_states, dtype=np.float32) if num_states > 1: transition_probs[:num_states, :num_states] = ( daily_change_prob / (num_states - 1)) np.fill_diagonal(transition_probs[:num_states, :num_states], 1 - daily_change_prob) return initial_state_logits, transition_probs max_num_states = 10 batch_initial_state_logits = [] batch_transition_probs = [] for num_states in range(1, max_num_states + 1): initial_state_logits, transition_probs = build_latent_state( num_states=num_states, max_num_states=max_num_states) batch_initial_state_logits.append(initial_state_logits) batch_transition_probs.append(transition_probs) batch_initial_state_logits = np.array(batch_initial_state_logits) batch_transition_probs = np.array(batch_transition_probs) trainable_log_rates = tf.Variable( (np.log(np.mean(fitting_area)) * np.ones([batch_initial_state_logits.shape[0], max_num_states]) + tf.random.normal([1, max_num_states])), name='log_rates') hmm = tfd.HiddenMarkovModel( initial_distribution=tfd.Categorical( logits=batch_initial_state_logits), transition_distribution=tfd.Categorical(probs=batch_transition_probs), observation_distribution=tfd.Poisson(log_rate=trainable_log_rates), num_steps=len(fitting_area)) rate_prior = tfd.LogNormal(5, 5) optimizer = tf.keras.optimizers.Adam(learning_rate=0.1) def log_prob(): prior_lps = rate_prior.log_prob(tf.math.exp(trainable_log_rates)) prior_lp = tf.stack( [tf.reduce_sum(prior_lps[i, :i + 1]) for i in range(max_num_states)]) return prior_lp + hmm.log_prob(fitting_area) @tf.function(autograph=False) def train_op(): with tf.GradientTape() as tape: neg_log_prob = -log_prob() grads = tape.gradient(neg_log_prob, [trainable_log_rates])[0] optimizer.apply_gradients([(grads, trainable_log_rates)]) return neg_log_prob, tf.math.exp(trainable_log_rates) for step in range(201): loss, rates = [t.numpy() for t in train_op()] if step % 20 == 0: print("step {}: loss {}".format(step, loss)) posterior_probs = hmm.posterior_marginals( fitting_area).probs_parameter().numpy() most_probable_states = np.argmax(posterior_probs, axis=-1) fig = plt.figure(figsize=(14, 12)) for i, learned_model_rates in enumerate(rates): ax = fig.add_subplot(4, 3, i + 1) ax.plot(learned_model_rates[most_probable_states[i]], c='green', lw=3, label='inferred rate') ax.plot(fitting_area, c='black', alpha=0.3, label='observed counts') ax.set_ylabel("latent rate") ax.set_xlabel("time") ax.set_title("{}-state model".format(i + 1)) ax.legend(loc=4) plt.tight_layout() plt.show() fig = plt.figure(figsize=(14, 12)) list_of_moment_map = [] for number_of_states in range(max_num_states): moment_map = {} ax = fig.add_subplot(4, 3, number_of_states + 1) for state_no in range(max_num_states): moment_map[state_no] = [] index = 0 for state in most_probable_states[number_of_states]: moment_map[state].append(index) index += 1 # moment_map = {k:v for k,v in moment_map.items() if len(v) > 0} frequency_count = [len(moment_map[x]) / index for x in moment_map] bar1 = ax.bar(range(len(moment_map)), frequency_count) # autolabel(bar1,most_probable_states[number_of_states]) ax.set_ylim(0, 1.1) ax.set_xlabel("state id") ax.set_title("{}-state model".format(i + 1)) list_of_moment_map.append(moment_map) plt.tight_layout() plt.savefig("rate_frequency.png") plt.clf() return list_of_moment_map
} }, "class": lambda theta: tensorflow_distributions.Bernoulli( logits = theta["logits"] ) }, "poisson": { "parameters": { "log_lambda": { "support": [-10, 10], "activation function": identity } }, "class": lambda theta: tensorflow_distributions.Poisson( rate = tf.exp(theta["log_lambda"]) ) }, "constrained poisson": { "parameters": { "lambda": { "support": [0, 1], "activation function": softmax } }, "class": lambda theta, N: tensorflow_distributions.Poisson( rate = theta["lambda"] * N ) },
def tensorflow_model(self, pars): """Output tensorflow probability model object, to be combined together and sampled from. pars - dictionary of signal and nuisance parameters (tensors, constant or Variable) """ # Need to construct these shapes to match the event_shape, batch_shape, sample_shape # semantics of tensorflow_probability. cov_order = self.get_cov_order() small = 1e-10 #print("pars:",pars) tfds = {} # Determine which SRs participate in the covariance matrix if self.cov is not None: cov = tf.constant(self.cov, dtype=c.TFdtype) cov_diag = tf.constant( [self.cov[k][k] for k in range(len(self.cov))]) # Select which systematic to use, depending on whether SR participates in the covariance matrix bsys_tmp = [ np.sqrt(self.cov_diag[cov_order.index(sr)]) if self.in_cov[i] else self.SR_b_sys[i] for i, sr in enumerate(self.SR_names) ] else: bsys_tmp = self.SR_b_sys[:] # Prepare input parameters #print("input pars:",pars) s = pars[ 's'] * self.s_scaling # We "scan" normalised versions of s, to help optimizer theta = pars[ 'theta'] * self.theta_scaling # We "scan" normalised versions of theta, to help optimizer #print("de-scaled pars: s :",s) #print("de-scaled pars: theta:",theta) theta_safe = theta #print("theta_safe:", theta_safe) #print("rate:", s+b+theta_safe) # Expand dims of internal parameters to match input pars. Right-most dimension is the 'event' dimension, # i.e. parameters for each independent Poisson distribution. The rest go into batch_shape. if s.shape == (): n_batch_dims = 0 else: n_batch_dims = len(s.shape) - 1 new_dims = [1 for i in range(n_batch_dims)] b = tf.constant(self.SR_b, dtype=c.TFdtype) bsys = tf.constant(bsys_tmp, dtype=c.TFdtype) if n_batch_dims > 0: b = tf.reshape(b, new_dims + list(b.shape)) bsys = tf.reshape(bsys, new_dims + list(bsys.shape)) # Poisson model poises0 = tfd.Poisson( rate=tf.abs(s + b + theta_safe) + c.reallysmall ) # Abs works to constrain rate to be positive. Might be confusing to interpret BF parameters though. # Treat SR batch dims as event dims poises0i = tfd.Independent(distribution=poises0, reinterpreted_batch_ndims=1) tfds["n"] = poises0i # Multivariate background constraints if self.cov is not None: #print("theta_safe:",theta_safe) #print("covi:",self.covi) theta_cov = tf.gather(theta_safe, self.covi, axis=-1) #print("theta_cov:",theta_cov) cov_nuis = tfd.MultivariateNormalFullCovariance( loc=theta_cov, covariance_matrix=cov) tfds["x_cov"] = cov_nuis #print("str(cov_nuis):", str(cov_nuis)) # Remaining uncorrelated background constraints if np.sum(~self.in_cov) > 0: nuis0 = tfd.Normal(loc=theta_safe[..., ~self.in_cov], scale=bsys[..., ~self.in_cov]) # Treat SR batch dims as event dims nuis0i = tfd.Independent(distribution=nuis0, reinterpreted_batch_ndims=1) tfds["x_nocov"] = nuis0i else: # Only have uncorrelated background constraints nuis0 = tfd.Normal(loc=theta_safe, scale=bsys) # Treat SR batch dims as event dims nuis0i = tfd.Independent(distribution=nuis0, reinterpreted_batch_ndims=1) tfds["x"] = nuis0i #print("hello3") return tfds #, sample_layout, sample_count