def __call__(self): """Get the distribution object from the backend""" if get_backend() == 'pytorch': import torch.distributions as tod raise NotImplementedError else: from tensorflow_probability import distributions as tfd return tfd.HiddenMarkovModel( initial_distribution=tfd.Categorical(self['initial']), transition_distribution=tfd.Categorical(self['transition']), observation_distribution=self['observation'], num_steps=self['steps'])
def test_hmm_log_prob(): a0 = np.array([0.9, 0.08, 0.02]) a = np.array([[0.1, 0.8, 0.1], [0.5, 0.3, 0.2], [0.4, 0.4, 0.2]]) e = np.array([[0.99, 0.01], [0.01, 0.99], [0.5, 0.5]]) model = tfpd.HiddenMarkovModel( tfpd.Categorical( logits=tf.math.log(tf.convert_to_tensor(np.matmul(a0, a)))), tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(a))), tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(e))), 5) x = tf.convert_to_tensor( np.array([[0., 1.], [1., 0.], [0., 1.], [0., 1.], [1., 0.], [0.5, 0.5]])) xlen = tf.convert_to_tensor(5) chk_lp = mue.hmm_log_prob(model, x, xlen) f = np.matmul(a0, a) * e[:, 1] f = np.matmul(f, a) * e[:, 0] f = np.matmul(f, a) * e[:, 1] f = np.matmul(f, a) * e[:, 1] f = np.matmul(f, a) * e[:, 0] tst_lp = np.log(np.sum(f)) assert np.allclose(chk_lp.numpy(), tst_lp) # Check against (predictably incorrect) tensorflow probability # implementation. model = tfpd.HiddenMarkovModel( tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(a0))), tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(a))), tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(e))), 5) xcat = tf.convert_to_tensor([1, 0, 1, 1, 0]) tst_lp2 = model.log_prob(xcat) assert np.allclose(chk_lp.numpy(), tst_lp2.numpy())
def train_HMM(self): # Define variable to represent the unknown log rates. _trainable_log_rates = tf.Variable( np.log(np.mean(self.observed_counts)) + tf.random.normal([self.num_states]), name='log_rates') self.hmm = tfd.HiddenMarkovModel( initial_distribution=tfd.Categorical(logits=initial_state_logits), transition_distribution=tfd.Categorical( probs=self._transition_probs), observation_distribution=tfd.Poisson( log_rate=_trainable_log_rates), num_steps=len(observed_counts))
def test_forward_mean(): a0 = np.array([0.9, 0.08, 0.02]) a = np.array([[0.1, 0.8, 0.1], [0.5, 0.3, 0.2], [0.4, 0.4, 0.2]]) e = np.array([[0.99, 0.01], [0.01, 0.99], [0.5, 0.5]]) model = tfpd.HiddenMarkovModel( tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(a0))), tfpd.Categorical(logits=tf.math.log(tf.convert_to_tensor(a))), tfpd.OneHotCategorical(logits=tf.math.log(tf.convert_to_tensor(e))), 5) tst_mean = model.mean() chk_mean = mue.hmm_mean(model, 5) assert np.allclose(tst_mean.numpy(), chk_mean.numpy())
def encode(x, uln0, rln0, lln0, latent_length, latent_alphabet_size, alphabet_size, padded_data_length, transfer_mats, dtype=tf.float64, eps=1e-32): """First layer of encoder, using the MuE mean.""" # Set initial sequence (replace inf with large number) vxln = tf.maximum(tf.math.log(x), -1e32) # Set insert biases to uniform distribution. vcln = -np.log(alphabet_size) * tf.ones_like(vxln) # Set deletion and insertion parameters. uln = tf.ones((padded_data_length, 2), dtype=dtype) * (uln0 - tf.reduce_logsumexp(uln0))[None, :] rln = tf.ones((padded_data_length, 2), dtype=dtype) * (rln0 - tf.reduce_logsumexp(rln0))[None, :] lln = lln0 - tf.reduce_logsumexp(lln0, axis=1, keepdims=True) # Build HiddenMarkovModel, with one-hot encoded output. a0_enc, a_enc, e_enc = make_hmm_params(vxln, vcln, uln, rln, lln, transfer_mats, eps=eps, dtype=dtype) hmm_enc = tfpd.HiddenMarkovModel(tfpd.Categorical(logits=a0_enc), tfpd.Categorical(logits=a_enc), tfpd.OneHotCategorical(logits=e_enc), latent_length) return hmm_mean(hmm_enc, latent_length)
true_durations = [10, 20, 5, 35] observed_counts = np.concatenate([ scipy.stats.poisson(rate).rvs(num_steps) for (rate, num_steps) in zip(true_rates, true_durations) ]).astype(np.float32) plt.plot(observed_counts) plt.show() initial_state_logits = np.zeros([num_states], dtype=np.float32) # uniform distribution daily_change_prob = 0.05 transition_probs = daily_change_prob / (num_states - 1) * np.ones( [num_states, num_states], dtype=np.float32) np.fill_diagonal(transition_probs, 1 - daily_change_prob) print("Initial state logits:\n{}".format(initial_state_logits)) print("Transition matrix:\n{}".format(transition_probs)) trainable_log_rates = tf.Variable( np.log(np.mean(observed_counts)) + tf.random.normal([num_states]), name='log_rates') hmm = tfd.HiddenMarkovModel( initial_distribution=tfd.Categorical( logits=initial_state_logits), transition_distribution=tfd.Categorical(probs=transition_probs), observation_distribution=tfd.Poisson(log_rate=trainable_log_rates), num_steps=len(observed_counts))
def get_list_of_moment_map(fitting_area): def build_latent_state(num_states, max_num_states, daily_change_prob=0.05): # Give probability exp(-100) ~= 0 to states outside of the current model. initial_state_logits = -100. * np.ones([max_num_states], dtype=np.float32) initial_state_logits[:num_states] = 0. initial_state_logits[0] = 1. # Build a transition matrix that transitions only within the current # `num_states` states. transition_probs = np.eye(max_num_states, dtype=np.float32) if num_states > 1: transition_probs[:num_states, :num_states] = ( daily_change_prob / (num_states - 1)) np.fill_diagonal(transition_probs[:num_states, :num_states], 1 - daily_change_prob) return initial_state_logits, transition_probs max_num_states = 10 batch_initial_state_logits = [] batch_transition_probs = [] for num_states in range(1, max_num_states + 1): initial_state_logits, transition_probs = build_latent_state( num_states=num_states, max_num_states=max_num_states) batch_initial_state_logits.append(initial_state_logits) batch_transition_probs.append(transition_probs) batch_initial_state_logits = np.array(batch_initial_state_logits) batch_transition_probs = np.array(batch_transition_probs) trainable_log_rates = tf.Variable( (np.log(np.mean(fitting_area)) * np.ones([batch_initial_state_logits.shape[0], max_num_states]) + tf.random.normal([1, max_num_states])), name='log_rates') hmm = tfd.HiddenMarkovModel( initial_distribution=tfd.Categorical( logits=batch_initial_state_logits), transition_distribution=tfd.Categorical(probs=batch_transition_probs), observation_distribution=tfd.Poisson(log_rate=trainable_log_rates), num_steps=len(fitting_area)) rate_prior = tfd.LogNormal(5, 5) optimizer = tf.keras.optimizers.Adam(learning_rate=0.1) def log_prob(): prior_lps = rate_prior.log_prob(tf.math.exp(trainable_log_rates)) prior_lp = tf.stack( [tf.reduce_sum(prior_lps[i, :i + 1]) for i in range(max_num_states)]) return prior_lp + hmm.log_prob(fitting_area) @tf.function(autograph=False) def train_op(): with tf.GradientTape() as tape: neg_log_prob = -log_prob() grads = tape.gradient(neg_log_prob, [trainable_log_rates])[0] optimizer.apply_gradients([(grads, trainable_log_rates)]) return neg_log_prob, tf.math.exp(trainable_log_rates) for step in range(201): loss, rates = [t.numpy() for t in train_op()] if step % 20 == 0: print("step {}: loss {}".format(step, loss)) posterior_probs = hmm.posterior_marginals( fitting_area).probs_parameter().numpy() most_probable_states = np.argmax(posterior_probs, axis=-1) fig = plt.figure(figsize=(14, 12)) for i, learned_model_rates in enumerate(rates): ax = fig.add_subplot(4, 3, i + 1) ax.plot(learned_model_rates[most_probable_states[i]], c='green', lw=3, label='inferred rate') ax.plot(fitting_area, c='black', alpha=0.3, label='observed counts') ax.set_ylabel("latent rate") ax.set_xlabel("time") ax.set_title("{}-state model".format(i + 1)) ax.legend(loc=4) plt.tight_layout() plt.show() fig = plt.figure(figsize=(14, 12)) list_of_moment_map = [] for number_of_states in range(max_num_states): moment_map = {} ax = fig.add_subplot(4, 3, number_of_states + 1) for state_no in range(max_num_states): moment_map[state_no] = [] index = 0 for state in most_probable_states[number_of_states]: moment_map[state].append(index) index += 1 # moment_map = {k:v for k,v in moment_map.items() if len(v) > 0} frequency_count = [len(moment_map[x]) / index for x in moment_map] bar1 = ax.bar(range(len(moment_map)), frequency_count) # autolabel(bar1,most_probable_states[number_of_states]) ax.set_ylim(0, 1.1) ax.set_xlabel("state id") ax.set_title("{}-state model".format(i + 1)) list_of_moment_map.append(moment_map) plt.tight_layout() plt.savefig("rate_frequency.png") plt.clf() return list_of_moment_map
def latent_state_number_changing_curve(fitting_area, output_dir_prefix, log_dir_prefix, log_dir, fig_name=""): max_num_states = 10 def build_latent_state(num_states, max_num_states, daily_change_prob=0.05): # Give probability exp(-100) ~= 0 to states outside of the current model. initial_state_logits = -100. * np.ones([max_num_states], dtype=np.float32) initial_state_logits[:num_states] = 0. initial_state_logits[0] = 1. # Build a transition matrix that transitions only within the current # `num_states` states. transition_probs = np.eye(max_num_states, dtype=np.float32) if num_states > 1: transition_probs[:num_states, :num_states] = ( daily_change_prob / (num_states - 1)) np.fill_diagonal(transition_probs[:num_states, :num_states], 1 - daily_change_prob) return initial_state_logits, transition_probs # For each candidate model, build the initial state prior and transition matrix. batch_initial_state_logits = [] batch_transition_probs = [] for num_states in range(1, max_num_states + 1): initial_state_logits, transition_probs = build_latent_state( num_states=num_states, max_num_states=max_num_states) batch_initial_state_logits.append(initial_state_logits) batch_transition_probs.append(transition_probs) batch_initial_state_logits = np.array(batch_initial_state_logits) batch_transition_probs = np.array(batch_transition_probs) trainable_log_rates = tf.Variable( (np.log(np.mean(fitting_area)) * np.ones([batch_initial_state_logits.shape[0], max_num_states]) + tf.random.normal([1, max_num_states])), name='log_rates') hmm = tfd.HiddenMarkovModel( initial_distribution=tfd.Categorical( logits=batch_initial_state_logits), transition_distribution=tfd.Categorical(probs=batch_transition_probs), observation_distribution=tfd.Poisson(log_rate=trainable_log_rates), num_steps=len(fitting_area)) rate_prior = tfd.LogNormal(5, 5) optimizer = tf.keras.optimizers.Adam(learning_rate=0.1) def log_prob(): prior_lps = rate_prior.log_prob(tf.math.exp(trainable_log_rates)) prior_lp = tf.stack( [tf.reduce_sum(prior_lps[i, :i + 1]) for i in range(max_num_states)]) return prior_lp + hmm.log_prob(fitting_area) @tf.function(autograph=False) def train_op(): with tf.GradientTape() as tape: neg_log_prob = -log_prob() grads = tape.gradient(neg_log_prob, [trainable_log_rates])[0] optimizer.apply_gradients([(grads, trainable_log_rates)]) return neg_log_prob, tf.math.exp(trainable_log_rates) for step in range(201): loss, rates = [t.numpy() for t in train_op()] if step % 20 == 0: print("step {}: loss {}".format(step, loss)) num_states = np.arange(1, max_num_states + 1) fig = plt.figure(figsize=(8, 6)) plt.plot(num_states, -loss, "b-", label="likelihood") plt.ylabel("marginal likelihood $\\tilde{p}(x)$") plt.xlabel("number of latent states") plt.legend() plt.twinx() plt.plot(num_states, np.gradient(-loss), "g--", label="gradient") plt.ylabel("Gradient of the likelihood") plt.title("Model selection on latent states") plt.legend() output_path = output_dir_prefix + log_dir.replace(log_dir_prefix, "").replace("/", "_") mkdir_p(output_path) plt.savefig("{}/{}_likelihood_curve.pdf".format(output_path, fig_name), bbox_inches="tight") plt.savefig("{}/{}_likelihood_curve.png".format(output_path, fig_name), bbox_inches="tight") plt.clf() posterior_probs = hmm.posterior_marginals( fitting_area).probs_parameter().numpy() most_probable_states = np.argmax(posterior_probs, axis=-1) fig = plt.figure(figsize=(14, 12)) for i, learned_model_rates in enumerate(rates): ax = fig.add_subplot(4, 3, i + 1) ax.plot(learned_model_rates[most_probable_states[i]], c='green', lw=3, label='inferred rate') ax.plot(fitting_area, c='black', alpha=0.3, label='observed counts') ax.set_ylabel("latent rate") ax.set_xlabel("time") ax.set_title("{}-state model".format(i + 1)) ax.legend(loc=4) plt.tight_layout() plt.savefig("{}/{}_model_fitting_test.pdf".format(output_path, fig_name), bbox_inches="tight") plt.savefig("{}/{}_model_fitting_test.png".format(output_path, fig_name), bbox_inches="tight") plt.clf() pass
def HMM_on_one_file(log_dir): stdout_file, LOG_file, report_csv = get_log_and_std_files(log_dir) data_set = load_log_and_qps(LOG_file, report_csv) bucket_df = vectorize_by_compaction_output_level(data_set) bucket_df["qps"] = data_set.qps_df["interval_qps"] _ = bucket_df.plot(subplots=True) num_states = 5 # memtable filling, flush only, L0 compaction (CPU busy), crowded compaction (disk busy) initial_state_logits = np.zeros([num_states], dtype=np.float32) # uniform distribution initial_state_logits[ 0] = 1.0 # the possiblity of transferring into the Flushing limitation initial_state_logits initial_distribution = tfd.Categorical(probs=initial_state_logits) daily_change_prob = 0.05 transition_probs = daily_change_prob / (num_states - 1) * np.ones( [num_states, num_states], dtype=np.float32) np.fill_diagonal(transition_probs, 1 - daily_change_prob) observed_counts = bucket_df["qps"].fillna(0).tolist() observed_counts = np.array(observed_counts).astype(np.float32) transition_distribution = tfd.Categorical(probs=transition_probs) trainable_log_rates = tf.Variable(np.log(np.mean(observed_counts)) + tf.random.normal([num_states]), name='log_rates') hmm = tfd.HiddenMarkovModel( initial_distribution=initial_distribution, transition_distribution=transition_distribution, observation_distribution=tfd.Poisson(log_rate=trainable_log_rates), num_steps=len(observed_counts)) rate_prior = tfd.LogNormal(5, 5) # def log_prob(): return (tf.reduce_sum( rate_prior.log_prob(tf.math.exp(trainable_log_rates))) + hmm.log_prob(observed_counts)) optimizer = tf.keras.optimizers.Adam(learning_rate=0.1) @tf.function(autograph=False) def train_op(): with tf.GradientTape() as tape: neg_log_prob = -log_prob() grads = tape.gradient(neg_log_prob, [trainable_log_rates])[0] optimizer.apply_gradients([(grads, trainable_log_rates)]) return neg_log_prob, tf.math.exp(trainable_log_rates) # for step in range(201): loss, rates = [t.numpy() for t in train_op()] if step % 20 == 0: print("step {}: log prob {} rates {}".format(step, -loss, rates)) posterior_dists = hmm.posterior_marginals(observed_counts) posterior_probs = posterior_dists.probs_parameter().numpy() most_probable_states = np.argmax(posterior_probs, axis=1) most_probable_rates = rates[most_probable_states] fig = plt.figure(figsize=(10, 4)) ax = fig.add_subplot(1, 1, 1) ax.plot(most_probable_rates, c='green', lw=3, label='inferred rate') ax.plot(observed_counts, c='black', alpha=0.3, label='observed counts') ax.set_ylabel("latent rate") ax.set_xlabel("time") ax.set_title("Inferred latent rate over time") ax.legend(loc=4) output_path = "image/" + log_dir.replace("log_files/", "").replace( "/", "_") mkdir_p(output_path) plt.savefig("{}/state_guessing.pdf".format(output_path), bbox_inches="tight")