Example #1
0
def test_sticky_initialisation():
    emission_sequences = [[1, 2, 3] * 3] * 3
    hmm_sticky = bayesian_hmm.HDPHMM(emission_sequences, sticky=True)
    hmm_slippery = bayesian_hmm.HDPHMM(emission_sequences, sticky=False)
    hmm_sticky.initialise(20)
    hmm_slippery.initialise(20)

    # check chain initialises correctly in both cases
    assert 0 <= hmm_sticky.hyperparameters["kappa"] <= 1
    assert hmm_sticky.priors["kappa"] != (lambda: 0)
    assert hmm_slippery.hyperparameters["kappa"] == 0
    assert all(hmm_slippery.priors["kappa"]() == 0 for _ in range(100))
Example #2
0
def test_mcmc():
    # create emission sequences
    base_sequence = list(range(5))
    sequences = [base_sequence * 5 for _ in range(5)]

    # initialise object with overestimate of true number of latent states
    hmm = bayesian_hmm.HDPHMM(sequences)
    hmm.initialise(k=20)

    # estimate parameters, making use of multithreading functionality
    results = hmm.mcmc(n=100, burn_in=20)

    # check that results contains expected elements
    assert len(results) == 6
    assert all(len(r) == 8 for r in results)
    assert all(type(r) == list for r in results)
    assert all(len(x) == 5 for x in results[2])

    # check that elements have expected types
    observed_types = list(map(lambda r: type(r[0]), results))
    expected_types = [int, numpy.float64, tuple, dict, dict, tuple]
    assert observed_types == expected_types
    assert all(
        type(x) == float for hyperparams in results[2] for x in hyperparams)
    assert all(x >= 0 for hyperparams in results[2] for x in hyperparams)
Example #3
0
def test_empty_hmm():
    emission_sequences = []
    hmm = bayesian_hmm.HDPHMM(emission_sequences)
    assert not hmm.initialised
    hmm.initialise(0)
    assert hmm.initialised
    assert hmm.emissions == set()
    assert hmm.c == 0
    assert hmm.k == 0
    assert hmm.n == 0
Example #4
0
def test_print():
    # checks that printing does not cause an error
    emission_sequences = [["e" + str(x) for x in range(l)]
                          for l in range(1, 50)]
    hmm = bayesian_hmm.HDPHMM(emission_sequences)
    hmm.initialise(20)
    print(hmm)
    repr(hmm)
    hmm.print_fit_parameters()
    hmm.print_probabilities()
    assert True
Example #5
0
def test_initialise_hmm():
    emission_sequences = [["e" + str(x) for x in range(l)]
                          for l in range(1, 50)]
    hmm = bayesian_hmm.HDPHMM(emission_sequences)
    hmm.initialise(20)

    # check chain initialised correctly
    assert (hmm.emissions.symmetric_difference(
        set(["e" + str(x) for x in range(49)])) == set())
    assert hmm.c == 49
    assert hmm.k == 20
    assert hmm.n == 49
Example #6
0
    def _bayesianhdphmm(self, params):

        if 'graph_path' not in params:
            raise ValueError(
                "For {} algorithm, the graph path is needed!".format(
                    self.comm_detection_method))
        graph = nx.read_gml(params['graph_path'])

        # initialise object with overestimate of true number of latent states
        hmm = bayesian_hmm.HDPHMM(self.walks, sticky=False)
        hmm.initialise()

        n = params['bayesianhmm_number_of_steps']
        results = hmm.mcmc(n=n,
                           burn_in=n - 1,
                           save_every=1,
                           ncores=3,
                           verbose=False)

        map_index = -1
        parameters_map = results['parameters'][map_index]
        commlabel2comm = {}
        comm = 0
        for commlabel in parameters_map['p_initial'].keys():
            if commlabel is not None:
                commlabel2comm[commlabel] = comm
                comm += 1

        self.K = len(commlabel2comm.keys())

        parameters_map = results['parameters'][map_index]
        emission_prob = parameters_map['p_emission']

        phi = np.zeros(shape=(self.K, graph.number_of_nodes()), dtype=np.float)
        for node in range(graph.number_of_nodes()):
            for k in commlabel2comm.keys():
                phi[commlabel2comm[k], node] = emission_prob[k][str(node)]
        phi = (phi.T / np.sum(phi, 1)).T

        theta = None

        id2node = {int(node): node for node in graph.nodes()}

        chains = hmm.chains
        self.community_walks = []
        for i in range(len(self.walks)):
            community_walk = []
            for w in chains[i].latent_sequence:
                community_walk.append(commlabel2comm[w])
            self.community_walks.append(community_walk)

        return phi, theta, id2node
Example #7
0
def test_manual_priors():
    emission_sequences = [[1, 2, 3] * 3] * 3
    priors_default = {
        "alpha": lambda: np.random.gamma(2, 2),
        "gamma": lambda: np.random.gamma(3, 3),
        "alpha_emission": lambda: np.random.gamma(2, 2),
        "gamma_emission": lambda: np.random.gamma(3, 3),
        "kappa": lambda: np.random.beta(1, 1),
    }
    hmms = {
        "default":
        bayesian_hmm.HDPHMM(emission_sequences),
        "single":
        bayesian_hmm.HDPHMM(emission_sequences, priors={"alpha": lambda: -1}),
        "all":
        bayesian_hmm.HDPHMM(
            emission_sequences,
            priors={param: lambda: -1
                    for param in priors_default.keys()},
        ),
    }

    # check that priors work in all cases
    assert all(param > 0 for param in hmms["default"].hyperparameters.values())
    assert all(param < 0 for param in hmms["all"].hyperparameters.values())
    assert hmms["single"].hyperparameters["alpha"] < 0
    assert all(hmms["single"].hyperparameters[param] > 0
               for param in priors_default.keys() if param != "alpha")

    fail = False
    try:
        _ = bayesian_hmm.HDPHMM(emission_sequences,
                                priors={"kappa": lambda: 2},
                                sticky=False)
    except ValueError:
        fail = True
    assert fail
Example #8
0
def test_mcmc():
    # create emission sequences
    base_sequence = list(range(5))
    sequences = [base_sequence * 5 for _ in range(5)]

    # initialise object with overestimate of true number of latent states
    hmm = bayesian_hmm.HDPHMM(sequences)
    hmm.initialise(k=20)

    # estimate hyperparameters, making use of multithreading functionality
    results = hmm.mcmc(n=100, burn_in=20)

    # specify expected dict keys at nested levels
    expected_results_keys = [
        "state_count",
        "loglikelihood",
        "chain_loglikelihood",
        "hyperparameters",
        "beta_emission",
        "beta_transition",
        "parameters",
    ]
    expected_hyperparameters_keys = [
        "alpha",
        "gamma",
        "alpha_emission",
        "gamma_emission",
        "kappa",
    ]

    # check that results contains expected elements
    assert len(results) == 7
    assert list(results.keys()) == expected_results_keys
    assert all(len(r) == 8 for r in results.values())
    assert all(type(r) == list for r in results.values())

    # state count and calculate_loglikelihood are straightforward sequences
    assert all(type(x) == int for x in results["state_count"])
    assert all(
        type(x) == numpy.float64
        for k in ["loglikelihood", "chain_loglikelihood"] for x in results[k])

    # hyperparameters is a list of dicts with atmoic values
    assert all(type(x) == dict for x in results["hyperparameters"])
    assert all(
        list(x.keys()) == expected_hyperparameters_keys
        for x in results["hyperparameters"])
    assert all(
        type(y) == float for x in results["hyperparameters"]
        for y in x.values())
    assert all(y >= 0 for x in results["hyperparameters"] for y in x.values())

    # beta_emission and beta transition are dicts of floats
    assert all(type(x) == dict for x in results["beta_emission"])
    assert all(
        type(y) == float for x in results["beta_emission"] for y in x.values())
    assert all(type(x) == dict for x in results["beta_transition"])
    assert all(
        type(y) == float for x in results["beta_transition"]
        for y in x.values())

    # parameters is a list of dicts, with each dict a point-in-time snap of parameters
    assert all(type(x) == dict for x in results["parameters"])
    assert all(
        type(y) == dict for x in results["parameters"] for y in x.values())
    assert all(
        type(y) == str or y is None for x in results["parameters"]
        for y in x["p_initial"])
    assert all(
        type(y) == dict for x in results["parameters"]
        for y in x["p_transition"].values())
    assert all(
        type(y) == dict for x in results["parameters"]
        for y in x["p_emission"].values())