Exemple #1
0
def estimate_bp_hawkes_params(event_dict,
                              node_membership,
                              duration,
                              num_classes,
                              agg_adj=None,
                              return_block_pair_events=False):
    """
    Estimate CHIP Hawkes parameters.

    :param event_dict: Edge dictionary of events between all node pair.
    :param node_membership: (list) membership of every node to one of K classes.
    :param duration: (int) duration of the network
    :param num_classes: (int) number of blocks / classes
    :param agg_adj: (optional) np array (num_nodes x num_nodes) Adjacency matrix where element ij denotes the
                    number of events between nodes i an j. If None, this will be calculated.
    :param return_block_pair_events: (bool) If True, returns the return_block_pair_events

    :return: parameters of the CHIP model -> mu, alpha, beta, m
    """

    if agg_adj is None:
        num_nodes = len(node_membership)
        agg_adj = utils.event_dict_to_aggregated_adjacency(
            num_nodes, event_dict)

    bp_mu, bp_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
        agg_adj, node_membership, duration, 1e-10 / duration)

    bp_beta = np.zeros((num_classes, num_classes), dtype=np.float)
    block_pair_events = utils.event_dict_to_block_pair_events(
        event_dict, node_membership, num_classes)
    bp_size = utils.calc_block_pair_size(node_membership, num_classes)

    for b_i in range(num_classes):
        for b_j in range(num_classes):
            bp_beta[b_i, b_j], _ = estimate_utils.estimate_beta_from_events(
                block_pair_events[b_i][b_j], bp_mu[b_i, b_j],
                bp_alpha_beta_ratio[b_i, b_j], duration, bp_size[b_i, b_j])

    bp_alpha = bp_alpha_beta_ratio * bp_beta

    if return_block_pair_events:
        return bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio, block_pair_events

    return bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio
def calc_mean_and_error_of_count_estiamte(n_nodes):
    params = {
        'number_of_nodes': n_nodes,
        'class_probabilities': class_probs,
        'end_time': end_time,
        'alpha': alpha,
        'beta': beta,
        'mu_diag': mu_diag,
        'scale': False
    }

    event_dict, node_membership = utils.simulate_community_hawkes(params)

    if estimate_alpha_beta:
        bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio = model_utils.estimate_bp_hawkes_params(
            event_dict, node_membership, end_time, len(class_probs))
        return bp_mu, bp_alpha_beta_ratio, bp_alpha, bp_beta

    agg_adj = utils.event_dict_to_aggregated_adjacency(n_nodes, event_dict)
    bp_mu, bp_alpha_beta_ratio = estimate_hawkes_from_counts(
        agg_adj, node_membership, end_time, 1e-10 / end_time)

    return bp_mu, bp_alpha_beta_ratio
def estimate_bp_hawkes_params(event_dict, node_membership, duration,
                              num_classes):
    """
    Estimate CHIP Hawkes parameters.

    :param event_dict: Edge dictionary of events between all node pair.
    :param node_membership: (list) membership of every node to one of K classes.
    :param duration: (int) duration of the network
    :param num_classes: (int) number of blocks / classes

    :return: parameters of the CHIP model -> mu, alpha, beta, m
    """

    num_nodes = len(node_membership)

    agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict)
    bp_mu, bp_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
        agg_adj, node_membership, duration, 1e-10 / duration)

    bp_beta = np.zeros((num_classes, num_classes), dtype=np.float)
    block_pair_events = utils.event_dict_to_block_pair_events(
        event_dict, node_membership, num_classes)

    for b_i in range(num_classes):
        for b_j in range(num_classes):
            bp_size = len(np.where(node_membership == b_i)[0]) * len(
                np.where(node_membership == b_j)[0])
            if b_i == b_j:
                bp_size -= len(np.where(node_membership == b_i)[0])

            bp_beta[b_i, b_j], _ = estimate_utils.estimate_beta_from_events(
                block_pair_events[b_i][b_j], bp_mu[b_i, b_j],
                bp_alpha_beta_ratio[b_i, b_j], duration, bp_size)

    bp_alpha = bp_alpha_beta_ratio * bp_beta

    return bp_mu, bp_alpha, bp_beta, bp_alpha_beta_ratio
                                                 num_classes=num_classes,
                                                 verbose=False,
                                                 plot_eigenvalues=False)
    toc = time.time()

    print(f"Spectral clustering done in {toc - tic:.1f}s")

    if verbose:
        print(
            "Community assignment prob:",
            np.unique(train_node_membership, return_counts=True)[1] /
            train_num_nodes)

    tic = time.time()
    bp_mu, bp_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
        train_agg_adj, train_node_membership, train_duration,
        1e-10 / train_duration)
    toc = time.time()

    print(f"Mu and m estimated in {toc - tic:.1f}s")

    if verbose:
        print("Mu:")
        print(bp_mu)
        print("Ratio:")
        print(bp_alpha_beta_ratio)

    print("\nStart Beta estimation:")

    tic = time.time()
    bp_beta = np.zeros((num_classes, num_classes), dtype=np.float)
def fit_community_model(event_dict,
                        num_nodes,
                        duration,
                        num_classes,
                        local_search_max_iter,
                        local_search_n_cores,
                        verbose=False):
    """
    Fits CHIP model to a network.

    :param event_dict: Edge dictionary of events between all node pair.
    :param num_nodes: (int) Total number of nodes
    :param duration: (int) duration of the network
    :param num_classes: (int) number of blocks / classes
    :param local_search_max_iter: Maximum number of local search to be performed. If 0, no local search is done
    :param local_search_n_cores: Number of cores to parallelize local search. Only applicable if
                                 `local_search_max_iter` > 0
    :param verbose: Prints fitted Block Hawkes parameters

    :return: node_membership, mu, alpha, beta, block_pair_events
    """

    agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict)
    # adj = utils.event_dict_to_adjacency(num_nodes, event_dict)

    # Running spectral clustering
    node_membership = spectral_cluster(agg_adj, num_classes, verbose=False)

    if local_search_max_iter > 0 and num_classes > 1:
        node_membership, bp_mu, bp_alpha, bp_beta = cls.chip_local_search(
            event_dict,
            num_classes,
            node_membership,
            duration,
            max_iter=local_search_max_iter,
            n_cores=local_search_n_cores,
            return_fitted_param=True,
            verbose=False)

        block_pair_events = utils.event_dict_to_block_pair_events(
            event_dict, node_membership, num_classes)

    else:
        bp_mu, bp_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
            agg_adj, node_membership, duration, 1e-10 / duration)
        bp_beta = np.zeros((num_classes, num_classes), dtype=np.float)

        block_pair_events = utils.event_dict_to_block_pair_events(
            event_dict, node_membership, num_classes)

        for b_i in range(num_classes):
            for b_j in range(num_classes):
                bp_size = len(np.where(node_membership == b_i)[0]) * len(
                    np.where(node_membership == b_j)[0])
                if b_i == b_j:
                    bp_size -= len(np.where(node_membership == b_i)[0])

                bp_beta[b_i,
                        b_j], _ = estimate_utils.estimate_beta_from_events(
                            block_pair_events[b_i][b_j], bp_mu[b_i, b_j],
                            bp_alpha_beta_ratio[b_i, b_j], duration, bp_size)

        bp_alpha = bp_alpha_beta_ratio * bp_beta

    # Printing information about the fit
    if verbose:
        _, block_count = np.unique(node_membership, return_counts=True)
        class_prob = block_count / sum(block_count)

        print(f"Membership percentage: ", class_prob)

        print("Mu:")
        print(bp_mu)

        print("\nAlpha:")
        print(bp_alpha)

        print("\nBeta:")
        print(bp_beta)

    return node_membership, bp_mu, bp_alpha, bp_beta, block_pair_events
    # Running spectral clustering
    node_membership = spectral_cluster(agg_adj,
                                       num_classes=10,
                                       verbose=False,
                                       plot_eigenvalues=True)

    toc = time.time()

    print(f"Spectral clustering done in {toc - tic:.1f}s")

    if verbose:
        print("Community assignment prob:",
              np.unique(node_membership, return_counts=True)[1] / fb_num_node)

    tic = time.time()
    bp_mu, bp_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
        agg_adj, node_membership, fb_duration, 1e-10 / fb_duration)
    toc = time.time()

    print(f"Mu and m estimated in {toc - tic:.1f}s")

    if verbose:
        print("Mu:")
        print(bp_mu)
        print("Ratio:")
        print(bp_alpha_beta_ratio)

    print("\nStart Beta estimation:")

    tic = time.time()
    bp_beta = np.zeros((num_classes, num_classes), dtype=np.float)
    block_pair_events = utils.event_dict_to_block_pair_events(
def chip_local_search_single_core(event_dict,
                                  n_classes,
                                  node_membership_init,
                                  duration,
                                  max_iter=100,
                                  verbose=True):
    """
    This function is only here for speed comparisons against the multi-core version. All parameters are the same as
    `chip_local_search`.
    """
    n_nodes = len(node_membership_init)
    node_membership = node_membership_init
    agg_adj = utils.event_dict_to_aggregated_adjacency(n_nodes,
                                                       event_dict,
                                                       dtype=np.int)

    # estimate initial params of CHIP and its log-likelihood
    (mu, alpha, beta, alpha_beta_ratio) = fit_utils.estimate_bp_hawkes_params(
        event_dict, node_membership, duration, n_classes)

    block_pair_events = utils.event_dict_to_block_pair_events(
        event_dict, node_membership, n_classes)
    init_log_lik = fit_utils.calc_full_log_likelihood(
        block_pair_events,
        node_membership,
        mu,
        alpha,
        beta,
        duration,
        n_classes,
        add_com_assig_log_prob=False)

    log_lik = init_log_lik

    for iter in range(max_iter):
        if verbose:
            print(f"Iteration {iter}...", end='\r')

        # best neighbor will hold the best node_membership update in the form of (node_index, updated_class_membership)
        best_neigh = None

        # for each of the (k-1)*n neighboring solutions
        for n_i in range(n_nodes):
            n_i_class = node_membership[n_i]

            for c_i in range(n_classes):
                if c_i == n_i_class:
                    continue
                # update node_membership temporarily
                node_membership[n_i] = c_i

                # Eval the aprox log_lik of this neighbor, by est its mu and alpha/beta and using previous beta.
                neigh_mu, neigh_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
                    agg_adj,
                    node_membership,
                    duration,
                    default_mu=1e-10 / duration)
                neigh_alpha = neigh_alpha_beta_ratio * beta

                block_pair_events = utils.event_dict_to_block_pair_events(
                    event_dict, node_membership, n_classes)
                neigh_log_lik = fit_utils.calc_full_log_likelihood(
                    block_pair_events,
                    node_membership,
                    neigh_mu,
                    neigh_alpha,
                    beta,
                    duration,
                    n_classes,
                    add_com_assig_log_prob=False)

                # if log_lik if this neighbor is better than the "so far" best neighbor, use this neighbors as the best.
                if log_lik < neigh_log_lik:
                    log_lik = neigh_log_lik
                    best_neigh = (n_i, c_i)

                node_membership[n_i] = n_i_class

        # if no neighbor seem to increase log_lik, break. You're at a local optima.
        if best_neigh is None:
            if verbose:
                print(f"Local solution found with {iter} iterations.")
            break

        # if a good neighbor was found, update all CHIP params, and go for the next iteration.
        node_membership[best_neigh[0]] = best_neigh[1]
        (mu, alpha, beta,
         alpha_beta_ratio) = fit_utils.estimate_bp_hawkes_params(
             event_dict, node_membership, duration, n_classes)

        block_pair_events = utils.event_dict_to_block_pair_events(
            event_dict, node_membership, n_classes)
        log_lik = fit_utils.calc_full_log_likelihood(
            block_pair_events,
            node_membership,
            mu,
            alpha,
            beta,
            duration,
            n_classes,
            add_com_assig_log_prob=False)

    if verbose:
        print(
            f"likelihood went from {init_log_lik:.4f} to {log_lik:.4f}. "
            f"{100 * np.abs((log_lik - init_log_lik) / init_log_lik):.2f}% increase."
        )

    return node_membership
def calc_node_neigh_solutions(event_dict, n_classes, duration, node_membership,
                              agg_adj, beta, log_lik_init, node_batch):
    """
    Calculates the log-likelihood of neighboring solutions of a batch of nodes by changing their membership. If a higher
    log-likelihood was achieved the best solution will be returned, else a tuple of three np.nan is returned.

    :param event_dict: Edge dictionary of events between all node pair. Output of the generative models.
    :param n_classes: (int) total number of classes/blocks
    :param duration: (int) Duration of the network
    :param node_membership: (list) membership of every node to one of K classes
    :param agg_adj: aggregated/weighted adjacency of the network
    :param beta: K x K np array of block pairs beta. This is fixed for every solution to lower time complexity. Only mu
                 and m are estimated for each neighboring solution
    :param log_lik_init: (float) base log-likelihood
    :param node_batch: (list) nodes in the current batch

    :return: (node index, best class index, log_likelihood)
    """

    best_neigh = (np.nan, np.nan, np.nan)
    log_lik = log_lik_init
    # node_membership = node_membership.copy()

    for n_i in node_batch:
        n_i_class = node_membership[n_i]

        # Adding a constraint to maintain the number of blocks.
        if np.sum(node_membership == n_i_class) <= 2:
            continue

        for c_i in range(n_classes):
            if c_i == n_i_class:
                continue

            # update node_membership temporarily
            node_membership[n_i] = c_i

            # Eval the aprox log_lik of this neighbor, by est its mu and alpha/beta and using previous beta.
            neigh_mu, neigh_alpha_beta_ratio = estimate_utils.estimate_hawkes_from_counts(
                agg_adj,
                node_membership,
                duration,
                default_mu=1e-10 / duration)
            neigh_alpha = neigh_alpha_beta_ratio * beta

            block_pair_events = utils.event_dict_to_block_pair_events(
                event_dict, node_membership, n_classes)
            neigh_log_lik = fit_utils.calc_full_log_likelihood(
                block_pair_events,
                node_membership,
                neigh_mu,
                neigh_alpha,
                beta,
                duration,
                n_classes,
                add_com_assig_log_prob=False)

            # if log_lik if this neighbor is better than the "so far" best neighbor, use this neighbors as the best.
            if log_lik < neigh_log_lik:
                log_lik = neigh_log_lik
                best_neigh = (n_i, c_i, log_lik)

            node_membership[n_i] = n_i_class

    return best_neigh