def estimate_hawkes_kernel(event_dicts,
                           class_assignment,
                           n_classes,
                           bp_beta,
                           learner_param_dict=None):
    """
    Estimates mu and alpha for a network given the event_dict and a fixed and given beta/decay.

    :param event_dicts: Edge dictionary of events between all node pair. Output of the generative models.
    :param class_assignment: membership of every node to one of K classes. num_nodes x 1 (class of node i)
    :param n_classes: (int) number of classes
    :param bp_beta: K x K matrix where entry ij denotes the beta/decay of Hawkes process for block pair (b_i, b_j)
    :param learner_param_dict: dict of parameters for tick's hawkes kernel. If `None` default values will be used.
                                Check tick's `HawkesExpKern` for parameters. Check `default_param` for defaults.

    :return: `mu_estimate` and `alpha_estimates` both K x K matrices where entry ij denotes the estimated mu and alpha
             of the Hawkes process for block pair (b_i, b_j).
    """
    # Setting up parameters for estimation
    default_params = {
        'penalty': 'l2',
        'C': 0.1,
        'gofit': 'least-squares',
        'verbose': True,
        'tol': 1e-11,
        'solver': 'gd',
        'step': 1e-3,
        'max_iter': 1000
    }

    if learner_param_dict is not None:
        default_params.update(learner_param_dict)

    block_pair_events = utils.event_dict_to_block_pair_events(event_dicts,
                                                              class_assignment,
                                                              n_classes,
                                                              is_for_tick=True)

    alpha_estimates = np.zeros((n_classes, n_classes))
    mu_estimates = np.zeros((n_classes, n_classes))

    for b_i in range(n_classes):
        for b_j in range(n_classes):
            learner = tick.HawkesExpKern(bp_beta[b_i, b_j],
                                         penalty=default_params['penalty'],
                                         C=default_params['C'],
                                         gofit=default_params['gofit'],
                                         verbose=default_params['verbose'],
                                         tol=default_params['tol'],
                                         solver=default_params['solver'],
                                         step=default_params['step'],
                                         max_iter=default_params['max_iter'])

            learner.fit(block_pair_events[b_i][b_j], start=0.1)

            alpha_estimates[b_i,
                            b_j] = learner.adjacency[0][0] / bp_beta[b_i, b_j]
            mu_estimates[b_i, b_j] = learner.baseline[0]

    return mu_estimates, alpha_estimates
Beispiel #2
0
def estimate_bp_hawkes_params(event_dict,
                              node_membership,
                              duration,
                              num_classes,
                              agg_adj=None,
                              return_block_pair_events=False):
    """
    Estimate CHIP Hawkes parameters.

    :param event_dict: Edge dictionary of events between all node pair.
    :param node_membership: (list) membership of every node to one of K classes.
    :param duration: (int) duration of the network
    :param num_classes: (int) number of blocks / classes
    :param agg_adj: (optional) np array (num_nodes x num_nodes) Adjacency matrix where element ij denotes the
                    number of events between nodes i an j. If None, this will be calculated.
    :param return_block_pair_events: (bool) If True, returns the return_block_pair_events

    :return: parameters of the CHIP model -> mu, alpha, beta, m
    """

    if agg_adj is None:
        num_nodes = len(node_membership)
        agg_adj = utils.event_dict_to_aggregated_adjacency(
            num_nodes, event_dict)

    bp_mu, bp_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
        agg_adj, node_membership, duration, 1e-10 / duration)

    bp_beta = np.zeros((num_classes, num_classes), dtype=np.float)
    block_pair_events = utils.event_dict_to_block_pair_events(
        event_dict, node_membership, num_classes)
    bp_size = utils.calc_block_pair_size(node_membership, num_classes)

    for b_i in range(num_classes):
        for b_j in range(num_classes):
            bp_beta[b_i, b_j], _ = estimate_utils.estimate_beta_from_events(
                block_pair_events[b_i][b_j], bp_mu[b_i, b_j],
                bp_alpha_beta_ratio[b_i, b_j], duration, bp_size[b_i, b_j])

    bp_alpha = bp_alpha_beta_ratio * bp_beta

    if return_block_pair_events:
        return bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio, block_pair_events

    return bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio
def estimate_bp_hawkes_params(event_dict, node_membership, duration,
                              num_classes):
    """
    Estimate CHIP Hawkes parameters.

    :param event_dict: Edge dictionary of events between all node pair.
    :param node_membership: (list) membership of every node to one of K classes.
    :param duration: (int) duration of the network
    :param num_classes: (int) number of blocks / classes

    :return: parameters of the CHIP model -> mu, alpha, beta, m
    """

    num_nodes = len(node_membership)

    agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict)
    bp_mu, bp_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
        agg_adj, node_membership, duration, 1e-10 / duration)

    bp_beta = np.zeros((num_classes, num_classes), dtype=np.float)
    block_pair_events = utils.event_dict_to_block_pair_events(
        event_dict, node_membership, num_classes)

    for b_i in range(num_classes):
        for b_j in range(num_classes):
            bp_size = len(np.where(node_membership == b_i)[0]) * len(
                np.where(node_membership == b_j)[0])
            if b_i == b_j:
                bp_size -= len(np.where(node_membership == b_i)[0])

            bp_beta[b_i, b_j], _ = estimate_utils.estimate_beta_from_events(
                block_pair_events[b_i][b_j], bp_mu[b_i, b_j],
                bp_alpha_beta_ratio[b_i, b_j], duration, bp_size)

    bp_alpha = bp_alpha_beta_ratio * bp_beta

    return bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio
        1e-10 / train_duration)
    toc = time.time()

    print(f"Mu and m estimated in {toc - tic:.1f}s")

    if verbose:
        print("Mu:")
        print(bp_mu)
        print("Ratio:")
        print(bp_alpha_beta_ratio)

    print("\nStart Beta estimation:")

    tic = time.time()
    bp_beta = np.zeros((num_classes, num_classes), dtype=np.float)
    train_block_pair_events = utils.event_dict_to_block_pair_events(
        train_event_dict, train_node_membership, num_classes)

    cnt = 0
    for b_i in range(num_classes):
        for b_j in range(num_classes):
            bp_size = len(np.where(train_node_membership == b_i)[0]) * len(
                np.where(train_node_membership == b_j)[0])
            if b_i == b_j:
                bp_size -= len(np.where(train_node_membership == b_i)[0])

            bp_beta[b_i, b_j], _ = estimate_utils.estimate_beta_from_events(
                train_block_pair_events[b_i][b_j], bp_mu[b_i, b_j],
                bp_alpha_beta_ratio[b_i, b_j], train_duration, bp_size)
            cnt += 1
            print(f"{100 * cnt / num_classes ** 2:0.2f}% Done.", end='\r')
        class_probabilities,
        bp_mu,
        bp_alpha,
        bp_beta,
        end_time,
        burnin=burnin,
        seed=seed)
    toc = time.time()

    print(toc - tic)

    tic = time.time()
    node_memberships, event_dictss = community_generative_model(
        number_of_nodes,
        class_probabilities,
        bp_mu,
        bp_alpha,
        bp_beta,
        end_time,
        burnin=burnin,
        n_cores=-1,
        seed=seed)
    toc = time.time()
    print(toc - tic)

    node_membership = utils.one_hot_to_class_assignment(node_membership)

    block_pair_events = utils.event_dict_to_block_pair_events(
        event_dicts, node_membership, num_of_classes)
    print(block_pair_events)
def fit_community_model(event_dict,
                        num_nodes,
                        duration,
                        num_classes,
                        local_search_max_iter,
                        local_search_n_cores,
                        verbose=False):
    """
    Fits CHIP model to a network.

    :param event_dict: Edge dictionary of events between all node pair.
    :param num_nodes: (int) Total number of nodes
    :param duration: (int) duration of the network
    :param num_classes: (int) number of blocks / classes
    :param local_search_max_iter: Maximum number of local search to be performed. If 0, no local search is done
    :param local_search_n_cores: Number of cores to parallelize local search. Only applicable if
                                 `local_search_max_iter` > 0
    :param verbose: Prints fitted Block Hawkes parameters

    :return: node_membership, mu, alpha, beta, block_pair_events
    """

    agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict)
    # adj = utils.event_dict_to_adjacency(num_nodes, event_dict)

    # Running spectral clustering
    node_membership = spectral_cluster(agg_adj, num_classes, verbose=False)

    if local_search_max_iter > 0 and num_classes > 1:
        node_membership, bp_mu, bp_alpha, bp_beta = cls.chip_local_search(
            event_dict,
            num_classes,
            node_membership,
            duration,
            max_iter=local_search_max_iter,
            n_cores=local_search_n_cores,
            return_fitted_param=True,
            verbose=False)

        block_pair_events = utils.event_dict_to_block_pair_events(
            event_dict, node_membership, num_classes)

    else:
        bp_mu, bp_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
            agg_adj, node_membership, duration, 1e-10 / duration)
        bp_beta = np.zeros((num_classes, num_classes), dtype=np.float)

        block_pair_events = utils.event_dict_to_block_pair_events(
            event_dict, node_membership, num_classes)

        for b_i in range(num_classes):
            for b_j in range(num_classes):
                bp_size = len(np.where(node_membership == b_i)[0]) * len(
                    np.where(node_membership == b_j)[0])
                if b_i == b_j:
                    bp_size -= len(np.where(node_membership == b_i)[0])

                bp_beta[b_i,
                        b_j], _ = estimate_utils.estimate_beta_from_events(
                            block_pair_events[b_i][b_j], bp_mu[b_i, b_j],
                            bp_alpha_beta_ratio[b_i, b_j], duration, bp_size)

        bp_alpha = bp_alpha_beta_ratio * bp_beta

    # Printing information about the fit
    if verbose:
        _, block_count = np.unique(node_membership, return_counts=True)
        class_prob = block_count / sum(block_count)

        print(f"Membership percentage: ", class_prob)

        print("Mu:")
        print(bp_mu)

        print("\nAlpha:")
        print(bp_alpha)

        print("\nBeta:")
        print(bp_beta)

    return node_membership, bp_mu, bp_alpha, bp_beta, block_pair_events
def fit_and_eval_community_hawkes(train_tuple,
                                  test_tuple,
                                  combined_tuple,
                                  nodes_not_in_train,
                                  k_values_to_test=(1, 2, 3, 4, 5, 6, 7, 8, 9,
                                                    10),
                                  local_search_max_iter=0,
                                  local_search_n_cores=-1,
                                  plot_fitted_hist=False,
                                  verbose=False):
    """
    Fits the CHIP model to train and evaluates the log-likelihood on the test, by evaluating the
    log-likelihood on the combined dataset and subtracting the likelihood of train, dividing by number of events in test

    :param train_tuple, test_tuple, combined_tuple: A tuple of (event dict, number of nodes, duration)
    :param nodes_not_in_train: Nodes that are in the test data, but not in the train
    :param k_values_to_test: iterable obj of number of communities to fit
    :param local_search_max_iter: if >0, then the model is fitted using local search, else local search is not used.
    :param local_search_n_cores: Number of cores to be used for local search. Ignored if local_search_max_iter <= 0.
    :param plot_fitted_hist: If True, generates a CHIP model network based on the fitted parameters and plots a
                             histogram of the event count of real vs. fitted model.
    :param verbose: Prints details of the fit along the way.

    :return: (list) test log-likelihood per event for all `k_values_to_test`.
    """

    train_event_dict, train_num_nodes, train_duration = train_tuple
    test_event_dict, test_num_nodes, test_duration = test_tuple
    combined_event_dict, combined_num_nodes, combined_duration = combined_tuple

    total_tic = time.time()
    print("Log-likelihoods per event:")

    lls_per_event = []
    for num_classes in k_values_to_test:
        if verbose:
            print("K:", num_classes)

        tic = time.time()

        # Fitting the model to the train data
        train_node_membership, train_bp_mu, train_bp_alpha, train_bp_beta, train_block_pair_events = \
            model_utils.fit_community_model(train_event_dict, train_num_nodes, train_duration, num_classes,
                                            local_search_max_iter, local_search_n_cores,
                                            verbose=verbose)

        # Add nodes that were not in train to the largest block
        combined_node_membership = model_utils.assign_node_membership_for_missing_nodes(
            train_node_membership, nodes_not_in_train)

        # Calculate log-likelihood given the entire dataset
        combined_block_pair_events = utils.event_dict_to_block_pair_events(
            combined_event_dict, combined_node_membership, num_classes)

        combined_log_likelihood = model_utils.calc_full_log_likelihood(
            combined_block_pair_events, combined_node_membership, train_bp_mu,
            train_bp_alpha, train_bp_beta, combined_duration, num_classes)

        # Calculate log-likelihood given the train dataset
        train_log_likelihood = model_utils.calc_full_log_likelihood(
            train_block_pair_events, train_node_membership, train_bp_mu,
            train_bp_alpha, train_bp_beta, train_duration, num_classes)

        # Calculate per event log likelihood
        ll_per_event = model_utils.calc_per_event_log_likelihood(
            combined_log_likelihood, train_log_likelihood, test_event_dict,
            test_num_nodes)

        toc = time.time()
        lls_per_event.append(ll_per_event)

        # Print train and test log-likelihood per event
        train_n_events = np.sum(
            utils.event_dict_to_aggregated_adjacency(train_num_nodes,
                                                     train_event_dict))
        print(
            f"K: {num_classes} - Train ll: {train_log_likelihood / train_n_events:.4f}",
            end=' - ')
        print(f"Test ll: {ll_per_event:.3f} - Took: {toc - tic:.2f}s")

        if plot_fitted_hist:
            model_utils.generate_fit_community_hawkes(train_event_dict,
                                                      train_node_membership,
                                                      train_bp_mu,
                                                      train_bp_alpha,
                                                      train_bp_beta,
                                                      train_duration,
                                                      plot_fitted_hist,
                                                      n_cores=26)

    total_toc = time.time()

    print(f"Total time elapsed: {total_toc - total_tic:.2f}s")

    return lls_per_event
        agg_adj, node_membership, fb_duration, 1e-10 / fb_duration)
    toc = time.time()

    print(f"Mu and m estimated in {toc - tic:.1f}s")

    if verbose:
        print("Mu:")
        print(bp_mu)
        print("Ratio:")
        print(bp_alpha_beta_ratio)

    print("\nStart Beta estimation:")

    tic = time.time()
    bp_beta = np.zeros((num_classes, num_classes), dtype=np.float)
    block_pair_events = utils.event_dict_to_block_pair_events(
        fb_event_dict, node_membership, num_classes)

    cnt = 0
    for b_i in range(num_classes):
        for b_j in range(num_classes):
            bp_size = len(np.where(node_membership == b_i)[0]) * len(
                np.where(node_membership == b_j)[0])
            if b_i == b_j:
                bp_size -= len(np.where(node_membership == b_i)[0])

            bp_beta[b_i, b_j], _ = estimate_utils.estimate_beta_from_events(
                block_pair_events[b_i][b_j], bp_mu[b_i, b_j],
                bp_alpha_beta_ratio[b_i, b_j], fb_duration, bp_size)
            cnt += 1
            print(f"{100 * cnt / num_classes ** 2:0.2f}% Done.", end='\r')
def chip_local_search(event_dict,
                      n_classes,
                      node_membership_init,
                      duration,
                      max_iter=100,
                      n_cores=-1,
                      return_fitted_param=False,
                      verbose=True):
    """
    Performs local search / hill climbing to increase log-likelihood of the model by switching the community of a single
    node at a time. For every neighboring solution only mu and m are estimated, beta is fixed to the base solution to
    lower time complexity.

    :param event_dict: Edge dictionary of events between all node pair. Output of the generative models.
    :param n_classes: (int) total number of classes/blocks
    :param node_membership_init: (list) initial membership of every node to one of K classes. Usually output of the
                                 spectral clustering
    :param duration: (int) Duration of the network
    :param max_iter: (int) maximum number of iterations to be performed by local search.
    :param n_cores: (int) number of cores to be used to parallelize the search. If -1, use all available cores.
    :param return_fitted_param: if True, return the Hawkes parameters for the model as well.
    :param verbose: If True, prints more information on local search.

    :return: local optimum node_membership if `return_fitted_param` is false.
    """
    n_nodes = len(node_membership_init)
    nodes = np.arange(n_nodes)
    node_membership = node_membership_init
    agg_adj = utils.event_dict_to_aggregated_adjacency(n_nodes,
                                                       event_dict,
                                                       dtype=np.int)

    # estimate initial params of CHIP and its log-likelihood
    (mu, alpha, beta, alpha_beta_ratio) = fit_utils.estimate_bp_hawkes_params(
        event_dict, node_membership, duration, n_classes)

    block_pair_events = utils.event_dict_to_block_pair_events(
        event_dict, node_membership, n_classes)
    init_log_lik = fit_utils.calc_full_log_likelihood(
        block_pair_events,
        node_membership,
        mu,
        alpha,
        beta,
        duration,
        n_classes,
        add_com_assig_log_prob=False)

    log_lik = init_log_lik
    n_cores = n_cores if n_cores > 0 else multiprocessing.cpu_count()
    batch_size = np.int(n_nodes / n_cores) + 1

    for iter in range(max_iter):
        if verbose:
            print(f"Iteration {iter}...", end='\r')

        # for each of the (k-1)*n neighboring solutions
        possible_solutions = Parallel(n_jobs=n_cores)(
            delayed(calc_node_neigh_solutions)
            (event_dict, n_classes, duration, node_membership, agg_adj, beta,
             log_lik, nodes[batch_size * ii:batch_size * (ii + 1)])
            for ii in range(n_cores))

        possible_solutions = np.array(possible_solutions)

        # if all returned log-likelihoods are np.nan, break. You're at a local optima.
        if np.all(np.isnan(possible_solutions[:, 2])):
            if verbose:
                print(f"Local solution found with {iter} iterations.")
            break

        max_ll_neigh_idx = np.nanargmax(possible_solutions[:, 2])

        # if a good neighbor was found, update all CHIP params, and go for the next iteration.
        node_membership[int(possible_solutions[max_ll_neigh_idx, 0])] = int(
            possible_solutions[max_ll_neigh_idx, 1])
        (mu, alpha, beta,
         alpha_beta_ratio) = fit_utils.estimate_bp_hawkes_params(
             event_dict, node_membership, duration, n_classes)

        block_pair_events = utils.event_dict_to_block_pair_events(
            event_dict, node_membership, n_classes)
        log_lik = fit_utils.calc_full_log_likelihood(
            block_pair_events,
            node_membership,
            mu,
            alpha,
            beta,
            duration,
            n_classes,
            add_com_assig_log_prob=False)

        if iter == max_iter - 1:
            print("Warning: Max iter reached!")

    if verbose:
        print(
            f"likelihood went from {init_log_lik:.4f} to {log_lik:.4f}. "
            f"{100 * np.abs((log_lik - init_log_lik) / init_log_lik):.2f}% increase."
        )

    if return_fitted_param:
        return node_membership, mu, alpha, beta

    return node_membership
def chip_local_search_single_core(event_dict,
                                  n_classes,
                                  node_membership_init,
                                  duration,
                                  max_iter=100,
                                  verbose=True):
    """
    This function is only here for speed comparisons against the multi-core version. All parameters are the same as
    `chip_local_search`.
    """
    n_nodes = len(node_membership_init)
    node_membership = node_membership_init
    agg_adj = utils.event_dict_to_aggregated_adjacency(n_nodes,
                                                       event_dict,
                                                       dtype=np.int)

    # estimate initial params of CHIP and its log-likelihood
    (mu, alpha, beta, alpha_beta_ratio) = fit_utils.estimate_bp_hawkes_params(
        event_dict, node_membership, duration, n_classes)

    block_pair_events = utils.event_dict_to_block_pair_events(
        event_dict, node_membership, n_classes)
    init_log_lik = fit_utils.calc_full_log_likelihood(
        block_pair_events,
        node_membership,
        mu,
        alpha,
        beta,
        duration,
        n_classes,
        add_com_assig_log_prob=False)

    log_lik = init_log_lik

    for iter in range(max_iter):
        if verbose:
            print(f"Iteration {iter}...", end='\r')

        # best neighbor will hold the best node_membership update in the form of (node_index, updated_class_membership)
        best_neigh = None

        # for each of the (k-1)*n neighboring solutions
        for n_i in range(n_nodes):
            n_i_class = node_membership[n_i]

            for c_i in range(n_classes):
                if c_i == n_i_class:
                    continue
                # update node_membership temporarily
                node_membership[n_i] = c_i

                # Eval the aprox log_lik of this neighbor, by est its mu and alpha/beta and using previous beta.
                neigh_mu, neigh_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
                    agg_adj,
                    node_membership,
                    duration,
                    default_mu=1e-10 / duration)
                neigh_alpha = neigh_alpha_beta_ratio * beta

                block_pair_events = utils.event_dict_to_block_pair_events(
                    event_dict, node_membership, n_classes)
                neigh_log_lik = fit_utils.calc_full_log_likelihood(
                    block_pair_events,
                    node_membership,
                    neigh_mu,
                    neigh_alpha,
                    beta,
                    duration,
                    n_classes,
                    add_com_assig_log_prob=False)

                # if log_lik if this neighbor is better than the "so far" best neighbor, use this neighbors as the best.
                if log_lik < neigh_log_lik:
                    log_lik = neigh_log_lik
                    best_neigh = (n_i, c_i)

                node_membership[n_i] = n_i_class

        # if no neighbor seem to increase log_lik, break. You're at a local optima.
        if best_neigh is None:
            if verbose:
                print(f"Local solution found with {iter} iterations.")
            break

        # if a good neighbor was found, update all CHIP params, and go for the next iteration.
        node_membership[best_neigh[0]] = best_neigh[1]
        (mu, alpha, beta,
         alpha_beta_ratio) = fit_utils.estimate_bp_hawkes_params(
             event_dict, node_membership, duration, n_classes)

        block_pair_events = utils.event_dict_to_block_pair_events(
            event_dict, node_membership, n_classes)
        log_lik = fit_utils.calc_full_log_likelihood(
            block_pair_events,
            node_membership,
            mu,
            alpha,
            beta,
            duration,
            n_classes,
            add_com_assig_log_prob=False)

    if verbose:
        print(
            f"likelihood went from {init_log_lik:.4f} to {log_lik:.4f}. "
            f"{100 * np.abs((log_lik - init_log_lik) / init_log_lik):.2f}% increase."
        )

    return node_membership
def calc_node_neigh_solutions(event_dict, n_classes, duration, node_membership,
                              agg_adj, beta, log_lik_init, node_batch):
    """
    Calculates the log-likelihood of neighboring solutions of a batch of nodes by changing their membership. If a higher
    log-likelihood was achieved the best solution will be returned, else a tuple of three np.nan is returned.

    :param event_dict: Edge dictionary of events between all node pair. Output of the generative models.
    :param n_classes: (int) total number of classes/blocks
    :param duration: (int) Duration of the network
    :param node_membership: (list) membership of every node to one of K classes
    :param agg_adj: aggregated/weighted adjacency of the network
    :param beta: K x K np array of block pairs beta. This is fixed for every solution to lower time complexity. Only mu
                 and m are estimated for each neighboring solution
    :param log_lik_init: (float) base log-likelihood
    :param node_batch: (list) nodes in the current batch

    :return: (node index, best class index, log_likelihood)
    """

    best_neigh = (np.nan, np.nan, np.nan)
    log_lik = log_lik_init
    # node_membership = node_membership.copy()

    for n_i in node_batch:
        n_i_class = node_membership[n_i]

        # Adding a constraint to maintain the number of blocks.
        if np.sum(node_membership == n_i_class) <= 2:
            continue

        for c_i in range(n_classes):
            if c_i == n_i_class:
                continue

            # update node_membership temporarily
            node_membership[n_i] = c_i

            # Eval the aprox log_lik of this neighbor, by est its mu and alpha/beta and using previous beta.
            neigh_mu, neigh_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
                agg_adj,
                node_membership,
                duration,
                default_mu=1e-10 / duration)
            neigh_alpha = neigh_alpha_beta_ratio * beta

            block_pair_events = utils.event_dict_to_block_pair_events(
                event_dict, node_membership, n_classes)
            neigh_log_lik = fit_utils.calc_full_log_likelihood(
                block_pair_events,
                node_membership,
                neigh_mu,
                neigh_alpha,
                beta,
                duration,
                n_classes,
                add_com_assig_log_prob=False)

            # if log_lik if this neighbor is better than the "so far" best neighbor, use this neighbors as the best.
            if log_lik < neigh_log_lik:
                log_lik = neigh_log_lik
                best_neigh = (n_i, c_i, log_lik)

            node_membership[n_i] = n_i_class

    return best_neigh
Beispiel #12
0
def fit_community_model(event_dict,
                        num_nodes,
                        duration,
                        num_classes,
                        local_search_max_iter,
                        local_search_n_cores,
                        verbose=False):
    """
    Fits CHIP model to a network.

    :param event_dict: Edge dictionary of events between all node pair.
    :param num_nodes: (int) Total number of nodes
    :param duration: (int) duration of the network
    :param num_classes: (int) number of blocks / classes
    :param local_search_max_iter: Maximum number of local search to be performed. If 0, no local search is done
    :param local_search_n_cores: Number of cores to parallelize local search. Only applicable if
                                 `local_search_max_iter` > 0
    :param verbose: Prints fitted Block Hawkes parameters

    :return: node_membership, mu, alpha, beta, block_pair_events
    """

    agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict)
    # adj = utils.event_dict_to_adjacency(num_nodes, event_dict)

    # Running spectral clustering
    node_membership = spectral_cluster(agg_adj,
                                       num_classes,
                                       verbose=False,
                                       plot_eigenvalues=False)

    if local_search_max_iter > 0 and num_classes > 1:
        node_membership, bp_mu, bp_alpha, bp_beta = cls.chip_local_search(
            event_dict,
            num_classes,
            node_membership,
            duration,
            max_iter=local_search_max_iter,
            n_cores=local_search_n_cores,
            return_fitted_param=True,
            verbose=False)

        block_pair_events = utils.event_dict_to_block_pair_events(
            event_dict, node_membership, num_classes)

    else:
        (bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio,
         block_pair_events) = estimate_bp_hawkes_params(
             event_dict,
             node_membership,
             duration,
             num_classes,
             agg_adj=agg_adj,
             return_block_pair_events=True)

    # Printing information about the fit
    if verbose:
        _, block_count = np.unique(node_membership, return_counts=True)
        class_prob = block_count / sum(block_count)

        print(f"Membership percentage: ", class_prob)

        print("Mu:")
        print(bp_mu)

        print("\nAlpha:")
        print(bp_alpha)

        print("\nBeta:")
        print(bp_beta)

    return node_membership, bp_mu, bp_alpha, bp_beta, block_pair_events