def aggregate_measurements(number_of_trials, adjacency_matrix, profile_individual_stat_folder, profile_aggregate_stat_folder): adjacency_matrix = adjacency_matrix.tocsr() number_of_nodes = adjacency_matrix.shape[0] # Calculate random walk transition probability matrix. rw_transition, out_degree, in_degree = get_natural_random_walk_matrix(adjacency_matrix) iterate_nodes = np.where(out_degree != 0)[0] fpr_stats = pstats.Stats() lpr_stats = pstats.Stats() fcprd_stats = pstats.Stats() for trial in range(number_of_trials): for n in range(iterate_nodes.size): node = iterate_nodes[n] # Fast PR filename. filename = profile_individual_stat_folder + "/fpr_vertex_" + str(node) + "_trial_" + str(trial) + ".stats" fpr_stats.add(filename) # os.remove(filename) # Lazy PR filename. filename = profile_individual_stat_folder + "/lpr_vertex_" + str(node) + "_trial_" + str(trial) + ".stats" lpr_stats.add(filename) # os.remove(filename) # Fast CPRD filename. filename = profile_individual_stat_folder + "/fcprd_vertex_" + str(node) + "_trial_" + str(trial) + ".stats" fcprd_stats.add(filename) # os.remove(filename) fpr_stats.dump_stats(profile_aggregate_stat_folder + "/fpr_vertex.stats") lpr_stats.dump_stats(profile_aggregate_stat_folder + "/lpr_vertex.stats") fcprd_stats.dump_stats(profile_aggregate_stat_folder + "/fcprd_vertex.stats")
def similarity_slice_benchmark(number_of_trials, adjacency_matrix, rho_effective, epsilon, laziness_factor, profile_individual_stat_folder): """ Compares the efficiency of approaches calculating similarity matrix slices. """ adjacency_matrix = adjacency_matrix.tocsr() number_of_nodes = adjacency_matrix.shape[0] # Calculate random walk transition probability matrix. rw_transition, out_degree, in_degree = get_natural_random_walk_matrix(adjacency_matrix) # Calculate base communities(ego excluded) and out-degrees. adjacent_nodes = np.ndarray(number_of_nodes, dtype=np.ndarray) base_transitions = np.ndarray(number_of_nodes, dtype=np.ndarray) for n in np.arange(number_of_nodes): rw_transition_row = rw_transition.getrow(n) adjacent_nodes[n] = rw_transition_row.indices.astype(np.int64) base_transitions[n] = rw_transition_row.data # Calculate restart probability in the case of lazy PageRank. rho = (rho_effective * (1 - laziness_factor))/(1 - laziness_factor * rho_effective) s = np.zeros(number_of_nodes, dtype=np.float64) r = np.zeros(number_of_nodes, dtype=np.float64) iterate_nodes = np.where(out_degree != 0)[0] for trial in range(number_of_trials): gc.collect() print("Trial #", trial+1) for n, node in enumerate(iterate_nodes): if n % 500 == 0: gc.collect() epsilon_effective = calculate_epsilon_effective(None, epsilon, out_degree[node], out_degree[adjacent_nodes[node]], None) # Perform fast PageRank. s[:] = 0.0 r[:] = 0.0 filename = profile_individual_stat_folder + "/fpr_vertex_" + str(node) + "_trial_" + str(trial) + ".stats" profile.runctx("""similarity.fast_approximate_personalized_pagerank(s, r, base_transitions, adjacent_nodes, out_degree, in_degree, node, rho_effective, epsilon)""", globals(), {"s": s, "r": r, "base_transitions": base_transitions, "adjacent_nodes": adjacent_nodes, "out_degree": out_degree, "in_degree": in_degree, "node": node, "rho_effective": rho_effective, "epsilon": epsilon_effective}, filename=filename) # Perform lazy PageRank s[:] = 0.0 r[:] = 0.0 filename = profile_individual_stat_folder + "/lpr_vertex_" + str(node) + "_trial_" + str(trial) + ".stats" profile.runctx("""similarity.lazy_approximate_personalized_pagerank(s, r, base_transitions, adjacent_nodes, out_degree, in_degree, node, rho, epsilon)""", globals(), {"s": s, "r": r, "base_transitions": base_transitions, "adjacent_nodes": adjacent_nodes, "out_degree": out_degree, "in_degree": in_degree, "node": node, "rho": rho, "epsilon": epsilon_effective}, filename=filename) # Perform Regularized Commute-Time s[:] = 0.0 r[:] = 0.0 filename = profile_individual_stat_folder + "/fcprd_vertex_" + str(node) + "_trial_" + str(trial) + ".stats" profile.runctx("""similarity.fast_approximate_regularized_commute(s, r, base_transitions, adjacent_nodes, out_degree, in_degree, node, rho_effective, epsilon)""", globals(), {"s": s, "r": r, "base_transitions": base_transitions, "adjacent_nodes": adjacent_nodes, "out_degree": out_degree, "in_degree": in_degree, "node": node, "rho_effective": rho_effective, "epsilon": epsilon_effective}, filename=filename)