Esempio n. 1
0
def test_spectral_clustering_on_generative_model(scalar):
    params = {
        'alpha': 0.05,
        'beta': 0.08,
        'mu_diag': 0.00075 * scalar,
        'mu_off_diag': 0.00035 if sim_type == 'b' else 0.00035 * scalar,
        'scale': False,
        'number_of_nodes': 256
    }

    event_dict, true_class_assignments = utils.simulate_community_hawkes(
        params)
    num_nodes = len(true_class_assignments)
    # Spectral clustering on aggregated adjacency matrix
    agg_adj = utils.event_dict_to_aggregated_adjacency(num_nodes, event_dict)
    agg_adj_pred = spectral_cluster(agg_adj, num_classes=n_classes)
    agg_adj_sc_rand = adjusted_rand_score(true_class_assignments, agg_adj_pred)

    if not also_use_unweighted_adjacency:
        return agg_adj_sc_rand

    # Spectral clustering on aggregated adjacency matrix
    adj = utils.event_dict_to_adjacency(num_nodes, event_dict)
    adj_pred = spectral_cluster(adj, num_classes=n_classes)
    adj_sc_rand = adjusted_rand_score(true_class_assignments, adj_pred)

    return agg_adj_sc_rand, adj_sc_rand, np.sum(adj) / (num_nodes**2)
def fit_block_model(event_dict, num_nodes, duration, num_classes, local_search_max_iter, local_search_n_cores,
                    verbose=False):
    """
    Fits a Block Hawkes model to a network.

    :param event_dict: Edge dictionary of events between all node pair.
    :param num_nodes: (int) Total number of nodes
    :param duration: (int) duration of the network
    :param num_classes: (int) number of blocks / classes
    :param local_search_max_iter: Maximum number of local search to be performed. If 0, no local search is done
    :param local_search_n_cores: Number of cores to parallelize local search. Only applicable if
                                 `local_search_max_iter` > 0
    :param verbose: Prints fitted Block Hawkes parameters

    :return: node_membership, mu, alpha, beta, block_pair_events
    """
    adj = utils.event_dict_to_adjacency(num_nodes, event_dict)

    # Running spectral clustering
    node_membership = regularized_spectral_cluster(adj, num_classes=num_classes)

    if local_search_max_iter > 0 and num_classes > 1:
        node_membership, bp_mu, bp_alpha, bp_beta = bls.block_local_search(event_dict, num_classes, node_membership,
                                                                           duration,
                                                                           local_search_max_iter, local_search_n_cores,
                                                                           return_fitted_param=True, verbose=False)
        bp_events = event_dict_to_combined_block_pair_events(event_dict, node_membership, num_classes)

    else:
        bp_events = event_dict_to_combined_block_pair_events(event_dict, node_membership, num_classes)

        bp_mu, bp_alpha, bp_beta = estimate_hawkes_params(bp_events, node_membership, duration, num_classes)

    # Printing information about the fit
    if verbose:
        _, block_count = np.unique(node_membership, return_counts=True)
        class_prob = block_count / sum(block_count)

        print(f"Membership percentage: ", class_prob)

        print("Mu:")
        print(bp_mu)

        print("\nAlpha:")
        print(bp_alpha)

        print("\nBeta:")
        print(bp_beta)

    return node_membership, bp_mu, bp_alpha, bp_beta, bp_events
def test_spectral_clustering_on_generative_model(n_nodes):
    if agg_adj_should_fail:
        params = {
            'number_of_nodes': n_nodes,
            'alpha': 7.0,
            'beta': 8.0,
            'mu_off_diag': 0.001,
            'mu_diag': 0.002,
            'scale': False,
            'end_time': 400,
            'class_probabilities': class_prob,
            'n_cores': chip_n_cores
        }
    else:
        params = {
            'number_of_nodes': n_nodes,
            'alpha': 0.001,
            'beta': 0.008,
            'mu_off_diag': 0.001,
            'mu_diag': 0.001,
            # 'mu_diag': 0.002,
            'alpha_diag': 0.006,
            'scale': False,
            'end_time': 400,
            'class_probabilities': class_prob,
            'n_cores': chip_n_cores
        }

    # event_dict, true_class_assignments = utils.simulate_community_hawkes(
    #     params, network_name="10-block-10k-nodes-higher-mu-diff")

    event_dict, true_class_assignments = utils.simulate_community_hawkes(
        params)

    # Spectral clustering on adjacency matrix
    adj = utils.event_dict_to_adjacency(n_nodes, event_dict)
    adj_sc_pred = spectral_cluster(adj, num_classes=n_classes, verbose=False)
    adj_sc_rand = adjusted_rand_score(true_class_assignments, adj_sc_pred)

    # Spectral clustering on aggregated adjacency matrix
    agg_adj = utils.event_dict_to_aggregated_adjacency(n_nodes, event_dict)
    agg_adj_pred = spectral_cluster(agg_adj,
                                    num_classes=n_classes,
                                    verbose=False)
    agg_adj_sc_rand = adjusted_rand_score(true_class_assignments, agg_adj_pred)

    return adj_sc_rand, agg_adj_sc_rand
train_num_events = utils.num_events_in_event_dict(train_event_dict)
test_num_events = utils.num_events_in_event_dict(test_event_dict)
# if verbose:
print("Train: ", "Num Nodes:", train_num_nodes, "Duration:", train_duration,
      "Num Edges:", train_num_events)
print("Test: ", "Num Nodes:", test_num_nodes, "Duration:", test_duration,
      "Num Edges:", test_num_events)

# fit Facebook Wall-posts
if fit_chip:
    tic = time.time()
    train_agg_adj = utils.event_dict_to_aggregated_adjacency(
        train_num_nodes, train_event_dict)

    if not use_agg_adj:
        train_adj = utils.event_dict_to_adjacency(train_num_nodes,
                                                  train_event_dict)
    toc = time.time()

    if verbose:
        print(f"Generated aggregated adj in {toc - tic:.1f}s")

    tic_tot = time.time()
    tic = time.time()
    # Running spectral clustering
    if use_agg_adj:
        train_node_membership = spectral_cluster(train_agg_adj,
                                                 num_classes=num_classes,
                                                 verbose=False,
                                                 plot_eigenvalues=False)
    else:
        train_node_membership = spectral_cluster(train_adj,
                if node_pair not in event_dicts:
                    event_dicts[node_pair] = []

                event_dicts[node_pair].append(event_times[e])

    return node_membership, event_dicts


# Example of generating from the Block Hawkes model
if __name__ == "__main__":
    seed = 1
    number_of_nodes = 8
    class_probabilities = [0.2, 0.4, 0.1, 0.2, 0.1]
    num_of_classes = len(class_probabilities)
    end_time = 10
    bp_mu, bp_alpha, bp_beta = utils.generate_random_hawkes_params(num_of_classes,
                                                                   mu_range=(0.1, 0.3),
                                                                   alpha_range=(0.2, 0.4),
                                                                   beta_range=(0.5, 1),
                                                                   seed=seed)

    node_membership, event_dicts = block_generative_model(number_of_nodes,
                                                          class_probabilities,
                                                          bp_mu, bp_alpha, bp_beta,
                                                          end_time, seed=seed)

    print(node_membership, event_dicts.keys())
    print(utils.event_dict_to_adjacency(number_of_nodes, event_dicts))
    print(utils.event_dict_to_aggregated_adjacency(number_of_nodes, event_dicts))

        largest_connected_component_only=True)
    toc = time.time()

    print(f"Loaded the dataset in {toc - tic:.1f}s")

    num_events = utils.num_events_in_event_dict(fb_event_dict)
    if verbose:
        print("Num Nodes:", fb_num_node, "Duration:", fb_duration,
              "Num Edges:", num_events)

# fit Facebook Wall-posts
if fit_chip:
    tic = time.time()
    agg_adj = utils.event_dict_to_aggregated_adjacency(fb_num_node,
                                                       fb_event_dict)
    adj = utils.event_dict_to_adjacency(fb_num_node, fb_event_dict)
    toc = time.time()

    if verbose:
        print(f"Generated aggregated adj in {toc - tic:.1f}s")

    tic_tot = time.time()
    tic = time.time()
    # Running spectral clustering
    node_membership = spectral_cluster(agg_adj,
                                       num_classes=10,
                                       verbose=False,
                                       plot_eigenvalues=True)

    toc = time.time()