def test_sticky_initialisation(): emission_sequences = [[1, 2, 3] * 3] * 3 hmm_sticky = bayesian_hmm.HDPHMM(emission_sequences, sticky=True) hmm_slippery = bayesian_hmm.HDPHMM(emission_sequences, sticky=False) hmm_sticky.initialise(20) hmm_slippery.initialise(20) # check chain initialises correctly in both cases assert 0 <= hmm_sticky.hyperparameters["kappa"] <= 1 assert hmm_sticky.priors["kappa"] != (lambda: 0) assert hmm_slippery.hyperparameters["kappa"] == 0 assert all(hmm_slippery.priors["kappa"]() == 0 for _ in range(100))
def test_mcmc(): # create emission sequences base_sequence = list(range(5)) sequences = [base_sequence * 5 for _ in range(5)] # initialise object with overestimate of true number of latent states hmm = bayesian_hmm.HDPHMM(sequences) hmm.initialise(k=20) # estimate parameters, making use of multithreading functionality results = hmm.mcmc(n=100, burn_in=20) # check that results contains expected elements assert len(results) == 6 assert all(len(r) == 8 for r in results) assert all(type(r) == list for r in results) assert all(len(x) == 5 for x in results[2]) # check that elements have expected types observed_types = list(map(lambda r: type(r[0]), results)) expected_types = [int, numpy.float64, tuple, dict, dict, tuple] assert observed_types == expected_types assert all( type(x) == float for hyperparams in results[2] for x in hyperparams) assert all(x >= 0 for hyperparams in results[2] for x in hyperparams)
def test_empty_hmm(): emission_sequences = [] hmm = bayesian_hmm.HDPHMM(emission_sequences) assert not hmm.initialised hmm.initialise(0) assert hmm.initialised assert hmm.emissions == set() assert hmm.c == 0 assert hmm.k == 0 assert hmm.n == 0
def test_print(): # checks that printing does not cause an error emission_sequences = [["e" + str(x) for x in range(l)] for l in range(1, 50)] hmm = bayesian_hmm.HDPHMM(emission_sequences) hmm.initialise(20) print(hmm) repr(hmm) hmm.print_fit_parameters() hmm.print_probabilities() assert True
def test_initialise_hmm(): emission_sequences = [["e" + str(x) for x in range(l)] for l in range(1, 50)] hmm = bayesian_hmm.HDPHMM(emission_sequences) hmm.initialise(20) # check chain initialised correctly assert (hmm.emissions.symmetric_difference( set(["e" + str(x) for x in range(49)])) == set()) assert hmm.c == 49 assert hmm.k == 20 assert hmm.n == 49
def _bayesianhdphmm(self, params): if 'graph_path' not in params: raise ValueError( "For {} algorithm, the graph path is needed!".format( self.comm_detection_method)) graph = nx.read_gml(params['graph_path']) # initialise object with overestimate of true number of latent states hmm = bayesian_hmm.HDPHMM(self.walks, sticky=False) hmm.initialise() n = params['bayesianhmm_number_of_steps'] results = hmm.mcmc(n=n, burn_in=n - 1, save_every=1, ncores=3, verbose=False) map_index = -1 parameters_map = results['parameters'][map_index] commlabel2comm = {} comm = 0 for commlabel in parameters_map['p_initial'].keys(): if commlabel is not None: commlabel2comm[commlabel] = comm comm += 1 self.K = len(commlabel2comm.keys()) parameters_map = results['parameters'][map_index] emission_prob = parameters_map['p_emission'] phi = np.zeros(shape=(self.K, graph.number_of_nodes()), dtype=np.float) for node in range(graph.number_of_nodes()): for k in commlabel2comm.keys(): phi[commlabel2comm[k], node] = emission_prob[k][str(node)] phi = (phi.T / np.sum(phi, 1)).T theta = None id2node = {int(node): node for node in graph.nodes()} chains = hmm.chains self.community_walks = [] for i in range(len(self.walks)): community_walk = [] for w in chains[i].latent_sequence: community_walk.append(commlabel2comm[w]) self.community_walks.append(community_walk) return phi, theta, id2node
def test_manual_priors(): emission_sequences = [[1, 2, 3] * 3] * 3 priors_default = { "alpha": lambda: np.random.gamma(2, 2), "gamma": lambda: np.random.gamma(3, 3), "alpha_emission": lambda: np.random.gamma(2, 2), "gamma_emission": lambda: np.random.gamma(3, 3), "kappa": lambda: np.random.beta(1, 1), } hmms = { "default": bayesian_hmm.HDPHMM(emission_sequences), "single": bayesian_hmm.HDPHMM(emission_sequences, priors={"alpha": lambda: -1}), "all": bayesian_hmm.HDPHMM( emission_sequences, priors={param: lambda: -1 for param in priors_default.keys()}, ), } # check that priors work in all cases assert all(param > 0 for param in hmms["default"].hyperparameters.values()) assert all(param < 0 for param in hmms["all"].hyperparameters.values()) assert hmms["single"].hyperparameters["alpha"] < 0 assert all(hmms["single"].hyperparameters[param] > 0 for param in priors_default.keys() if param != "alpha") fail = False try: _ = bayesian_hmm.HDPHMM(emission_sequences, priors={"kappa": lambda: 2}, sticky=False) except ValueError: fail = True assert fail
def test_mcmc(): # create emission sequences base_sequence = list(range(5)) sequences = [base_sequence * 5 for _ in range(5)] # initialise object with overestimate of true number of latent states hmm = bayesian_hmm.HDPHMM(sequences) hmm.initialise(k=20) # estimate hyperparameters, making use of multithreading functionality results = hmm.mcmc(n=100, burn_in=20) # specify expected dict keys at nested levels expected_results_keys = [ "state_count", "loglikelihood", "chain_loglikelihood", "hyperparameters", "beta_emission", "beta_transition", "parameters", ] expected_hyperparameters_keys = [ "alpha", "gamma", "alpha_emission", "gamma_emission", "kappa", ] # check that results contains expected elements assert len(results) == 7 assert list(results.keys()) == expected_results_keys assert all(len(r) == 8 for r in results.values()) assert all(type(r) == list for r in results.values()) # state count and calculate_loglikelihood are straightforward sequences assert all(type(x) == int for x in results["state_count"]) assert all( type(x) == numpy.float64 for k in ["loglikelihood", "chain_loglikelihood"] for x in results[k]) # hyperparameters is a list of dicts with atmoic values assert all(type(x) == dict for x in results["hyperparameters"]) assert all( list(x.keys()) == expected_hyperparameters_keys for x in results["hyperparameters"]) assert all( type(y) == float for x in results["hyperparameters"] for y in x.values()) assert all(y >= 0 for x in results["hyperparameters"] for y in x.values()) # beta_emission and beta transition are dicts of floats assert all(type(x) == dict for x in results["beta_emission"]) assert all( type(y) == float for x in results["beta_emission"] for y in x.values()) assert all(type(x) == dict for x in results["beta_transition"]) assert all( type(y) == float for x in results["beta_transition"] for y in x.values()) # parameters is a list of dicts, with each dict a point-in-time snap of parameters assert all(type(x) == dict for x in results["parameters"]) assert all( type(y) == dict for x in results["parameters"] for y in x.values()) assert all( type(y) == str or y is None for x in results["parameters"] for y in x["p_initial"]) assert all( type(y) == dict for x in results["parameters"] for y in x["p_transition"].values()) assert all( type(y) == dict for x in results["parameters"] for y in x["p_emission"].values())