def main(): #################################################################################################################### # Parse arguments. #################################################################################################################### parser = argparse.ArgumentParser() # File paths. parser.add_argument( "-i", "--input", dest="input_edge_list_path", help="This is the file path of the graph in edge list format.", type=str, required=True) parser.add_argument( "-o", "--output", dest="output_feature_path", help="This is the file path of the graph in edge list format.", type=str, required=True) # Edge list parsing configuration. parser.add_argument( "-s", "--separator", dest="separator", help= "The character(s) separating the values in the edge list (default is tab: \"\t\").", type=str, required=False, default="\t") parser.add_argument( "-u", "--undirected", dest="undirected", help="Also create the reciprocal edge for each edge in edge list.", type=bool, required=False, default=False) # Algorithm configuration. parser.add_argument( "-r", "--rho", dest="restart_probability", help= "The restart probability for the vertex-centric PageRank calculation.", type=float, required=False, default=0.1) parser.add_argument( "-e", "--epsilon", dest="epsilon_threshold", help="The tolerance for calculating vertex-centric PageRank values.", type=float, required=False, default=1.0e-05) parser.add_argument("-nt", "--tasks", dest="number_of_tasks", help="The number of parallel tasks to create.", type=int, required=False, default=None) args = parser.parse_args() input_edge_list_path = args.input_edge_list_path output_feature_path = args.output_feature_path separator = args.separator undirected = args.undirected restart_probability = args.restart_probability epsilon_threshold = args.epsilon_threshold number_of_tasks = args.number_of_tasks if number_of_tasks is None: number_of_tasks = get_threads_number() #################################################################################################################### # Perform algorithm. #################################################################################################################### # Read the adjacency matrix. adjacency_matrix,\ node_to_id = read_adjacency_matrix(file_path=input_edge_list_path, separator=separator, undirected=undirected) # Make sure we are dealing with a symmetric adjacency matrix. adjacency_matrix = spsp.csr_matrix(adjacency_matrix) adjacency_matrix = (adjacency_matrix + adjacency_matrix.transpose()) / 2 # Perform ARCTE. features = arcte(adjacency_matrix=adjacency_matrix, rho=restart_probability, epsilon=epsilon_threshold, number_of_threads=number_of_tasks) features = spsp.csr_matrix(features) # Write features to output file. write_features(file_path=output_feature_path, features=features, separator=separator, node_to_id=node_to_id)
classifier_parameters["fit_intercept"] = True elif feature_extraction_method_name == "lapeig": classifier_parameters["C"] = 50.0 classifier_parameters["fit_intercept"] = False elif feature_extraction_method_name == "repeig": classifier_parameters["C"] = 50.0 classifier_parameters["fit_intercept"] = False else: print("Invalid method name.") raise RuntimeError return classifier_parameters PERCENTAGES = np.arange(1, 11) # [1, 10] TRIAL_NUM = 10 THREAD_NUM = get_threads_number() ######################################################################################################################## # Experiment execution. ######################################################################################################################## feature_extraction_parameters = get_feature_extraction_parameters(FEATURE_EXTRACTION_METHOD_NAME) classifier_parameters = get_classifier_parameters(FEATURE_EXTRACTION_METHOD_NAME) run_experiment(DATASET_NAME, DATASET_FOLDER, FEATURE_EXTRACTION_METHOD_NAME, PERCENTAGES, TRIAL_NUM, THREAD_NUM, feature_extraction_parameters,
def arcte(adjacency_matrix, rho, epsilon, number_of_threads=None): """ Extracts local community features for all graph nodes based on the partitioning of node-centric similarity vectors. Inputs: - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix. - rho: Restart probability - epsilon: Approximation threshold Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix. """ adjacency_matrix = sparse.csr_matrix(adjacency_matrix) number_of_nodes = adjacency_matrix.shape[0] if number_of_threads is None: number_of_threads = get_threads_number() if number_of_threads == 1: # Calculate natural random walk transition probability matrix. rw_transition, out_degree, in_degree = get_natural_random_walk_matrix(adjacency_matrix, make_shared=False) a = adjacency_matrix.copy() a.data = np.ones_like(a.data) edge_count_vector = np.squeeze(np.asarray(a.sum(axis=0), dtype=np.int64)) iterate_nodes = np.where(edge_count_vector != 0)[0] argsort_indices = np.argsort(edge_count_vector[iterate_nodes]) iterate_nodes = iterate_nodes[argsort_indices][::-1] iterate_nodes = iterate_nodes[np.where(edge_count_vector[iterate_nodes] > 1.0)[0]] # iterate_nodes = np.where(out_degree != 0)[0] # argsort_indices = np.argsort(out_degree[iterate_nodes]) # iterate_nodes = iterate_nodes[argsort_indices][::-1] # iterate_nodes = iterate_nodes[np.where(out_degree[iterate_nodes] > 1.0)[0]] local_features = arcte_worker(iterate_nodes, rw_transition.indices, rw_transition.indptr, rw_transition.data, out_degree, in_degree, rho, epsilon) else: # Calculate natural random walk transition probability matrix. rw_transition, out_degree, in_degree = get_natural_random_walk_matrix(adjacency_matrix, make_shared=True) a = adjacency_matrix.copy() a.data = np.ones_like(a.data) edge_count_vector = np.squeeze(np.asarray(a.sum(axis=0), dtype=np.int64)) iterate_nodes = np.where(edge_count_vector != 0)[0] argsort_indices = np.argsort(edge_count_vector[iterate_nodes]) iterate_nodes = iterate_nodes[argsort_indices][::-1] iterate_nodes = iterate_nodes[np.where(edge_count_vector[iterate_nodes] > 1.0)[0]] # iterate_nodes = np.where(out_degree != 0)[0] # argsort_indices = np.argsort(out_degree[iterate_nodes]) # iterate_nodes = iterate_nodes[argsort_indices][::-1] # iterate_nodes = iterate_nodes[np.where(out_degree[iterate_nodes] > 1.0)[0]] pool = mp.Pool(number_of_threads) node_chunks = list(parallel_chunks(iterate_nodes, number_of_threads)) node_count = 0 for chunk in node_chunks: node_count += len(list(chunk)) results = list() for chunk_no in range(len(pool._pool)): pool.apply_async(arcte_worker, args=(node_chunks[chunk_no], rw_transition.indices, rw_transition.indptr, rw_transition.data, out_degree, in_degree, rho, epsilon), callback=results.append) pool.close() pool.join() # local_features = sparse.hstack(results) local_features = results[0] for additive_features in results[1:]: local_features = local_features + additive_features local_features = sparse.csr_matrix(local_features) # Form base community feature matrix. identity_matrix = sparse.csr_matrix(sparse.eye(number_of_nodes, number_of_nodes, dtype=np.float64)) adjacency_matrix_ones = adjacency_matrix adjacency_matrix_ones.data = np.ones_like(adjacency_matrix.data) base_community_features = identity_matrix + adjacency_matrix_ones # Stack horizontally matrices to form feature matrix. try: features = sparse.hstack([base_community_features, local_features]).tocsr() except ValueError as e: print("Failure with horizontal feature stacking.") features = base_community_features return features
def arcte(adjacency_matrix, rho, epsilon, number_of_threads=None): """ Extracts local community features for all graph nodes based on the partitioning of node-centric similarity vectors. Inputs: - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix. - rho: Restart probability - epsilon: Approximation threshold Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix. """ adjacency_matrix = sparse.csr_matrix(adjacency_matrix) number_of_nodes = adjacency_matrix.shape[0] if number_of_threads is None: number_of_threads = get_threads_number() if number_of_threads == 1: # Calculate natural random walk transition probability matrix. rw_transition, out_degree, in_degree = get_natural_random_walk_matrix( adjacency_matrix, make_shared=False) a = adjacency_matrix.copy() a.data = np.ones_like(a.data) edge_count_vector = np.squeeze( np.asarray(a.sum(axis=0), dtype=np.int64)) iterate_nodes = np.where(edge_count_vector != 0)[0] argsort_indices = np.argsort(edge_count_vector[iterate_nodes]) iterate_nodes = iterate_nodes[argsort_indices][::-1] iterate_nodes = iterate_nodes[np.where( edge_count_vector[iterate_nodes] > 1.0)[0]] # iterate_nodes = np.where(out_degree != 0)[0] # argsort_indices = np.argsort(out_degree[iterate_nodes]) # iterate_nodes = iterate_nodes[argsort_indices][::-1] # iterate_nodes = iterate_nodes[np.where(out_degree[iterate_nodes] > 1.0)[0]] local_features = arcte_worker(iterate_nodes, rw_transition.indices, rw_transition.indptr, rw_transition.data, out_degree, in_degree, rho, epsilon) else: # Calculate natural random walk transition probability matrix. rw_transition, out_degree, in_degree = get_natural_random_walk_matrix( adjacency_matrix, make_shared=True) a = adjacency_matrix.copy() a.data = np.ones_like(a.data) edge_count_vector = np.squeeze( np.asarray(a.sum(axis=0), dtype=np.int64)) iterate_nodes = np.where(edge_count_vector != 0)[0] argsort_indices = np.argsort(edge_count_vector[iterate_nodes]) iterate_nodes = iterate_nodes[argsort_indices][::-1] iterate_nodes = iterate_nodes[np.where( edge_count_vector[iterate_nodes] > 1.0)[0]] # iterate_nodes = np.where(out_degree != 0)[0] # argsort_indices = np.argsort(out_degree[iterate_nodes]) # iterate_nodes = iterate_nodes[argsort_indices][::-1] # iterate_nodes = iterate_nodes[np.where(out_degree[iterate_nodes] > 1.0)[0]] pool = mp.Pool(number_of_threads) node_chunks = list(parallel_chunks(iterate_nodes, number_of_threads)) node_count = 0 for chunk in node_chunks: node_count += len(list(chunk)) results = list() for chunk_no in range(len(pool._pool)): pool.apply_async(arcte_worker, args=(node_chunks[chunk_no], rw_transition.indices, rw_transition.indptr, rw_transition.data, out_degree, in_degree, rho, epsilon), callback=results.append) pool.close() pool.join() # local_features = sparse.hstack(results) local_features = results[0] for additive_features in results[1:]: local_features = local_features + additive_features local_features = sparse.csr_matrix(local_features) # Form base community feature matrix. identity_matrix = sparse.csr_matrix( sparse.eye(number_of_nodes, number_of_nodes, dtype=np.float64)) adjacency_matrix_ones = adjacency_matrix adjacency_matrix_ones.data = np.ones_like(adjacency_matrix.data) base_community_features = identity_matrix + adjacency_matrix_ones # Stack horizontally matrices to form feature matrix. try: features = sparse.hstack([base_community_features, local_features]).tocsr() except ValueError as e: print("Failure with horizontal feature stacking.") features = base_community_features return features
classifier_parameters["fit_intercept"] = True elif feature_extraction_method_name == "lapeig": classifier_parameters["C"] = 50.0 classifier_parameters["fit_intercept"] = False elif feature_extraction_method_name == "repeig": classifier_parameters["C"] = 50.0 classifier_parameters["fit_intercept"] = False else: print("Invalid method name.") raise RuntimeError return classifier_parameters PERCENTAGES = np.arange(1, 11) # [1, 10] TRIAL_NUM = 10 THREAD_NUM = get_threads_number() ######################################################################################################################## # Experiment execution. ######################################################################################################################## feature_extraction_parameters = get_feature_extraction_parameters( FEATURE_EXTRACTION_METHOD_NAME) classifier_parameters = get_classifier_parameters( FEATURE_EXTRACTION_METHOD_NAME) run_experiment(DATASET_NAME, DATASET_FOLDER, FEATURE_EXTRACTION_METHOD_NAME, PERCENTAGES, TRIAL_NUM, THREAD_NUM, feature_extraction_parameters, classifier_parameters)
def main(): #################################################################################################################### # Parse arguments. #################################################################################################################### parser = argparse.ArgumentParser() # File paths. parser.add_argument("-i", "--input", dest="input_edge_list_path", help="This is the file path of the graph in edge list format.", type=str, required=True) parser.add_argument("-o", "--output", dest="output_feature_path", help="This is the file path of the graph in edge list format.", type=str, required=True) # Edge list parsing configuration. parser.add_argument("-s", "--separator", dest="separator", help="The character(s) separating the values in the edge list (default is tab: \"\t\").", type=str, required=False, default="\t") parser.add_argument("-u", "--undirected", dest="undirected", help="Also create the reciprocal edge for each edge in edge list.", type=bool, required=False, default=False) # Algorithm configuration. parser.add_argument("-r", "--rho", dest="restart_probability", help="The restart probability for the vertex-centric PageRank calculation.", type=float, required=False, default=0.1) parser.add_argument("-e", "--epsilon", dest="epsilon_threshold", help="The tolerance for calculating vertex-centric PageRank values.", type=float, required=False, default=1.0e-05) parser.add_argument("-nt", "--tasks", dest="number_of_tasks", help="The number of parallel tasks to create.", type=int, required=False, default=None) args = parser.parse_args() input_edge_list_path = args.input_edge_list_path output_feature_path = args.output_feature_path separator = args.separator undirected = args.undirected restart_probability = args.restart_probability epsilon_threshold = args.epsilon_threshold number_of_tasks = args.number_of_tasks if number_of_tasks is None: number_of_tasks = get_threads_number() #################################################################################################################### # Perform algorithm. #################################################################################################################### # Read the adjacency matrix. adjacency_matrix,\ node_to_id = read_adjacency_matrix(file_path=input_edge_list_path, separator=separator, undirected=undirected) # Make sure we are dealing with a symmetric adjacency matrix. adjacency_matrix = spsp.csr_matrix(adjacency_matrix) adjacency_matrix = (adjacency_matrix + adjacency_matrix.transpose())/2 # Perform ARCTE. features = arcte(adjacency_matrix=adjacency_matrix, rho=restart_probability, epsilon=epsilon_threshold, number_of_threads=number_of_tasks) features = spsp.csr_matrix(features) # Write features to output file. write_features(file_path=output_feature_path, features=features, separator=separator, node_to_id=node_to_id)