def metapath_to_degree_dicts(graph, metapath): metapath = graph.metagraph.get_metapath(metapath) _, _, source_adj_mat = metaedge_to_adjacency_matrix(graph, metapath[0], dense_threshold=0.7) _, _, target_adj_mat = metaedge_to_adjacency_matrix(graph, metapath[-1], dense_threshold=0.7) source_degrees = source_adj_mat.sum(axis=1).flat target_degrees = target_adj_mat.sum(axis=0).flat source_degree_to_ind = degrees_to_degree_to_ind(source_degrees) target_degree_to_ind = degrees_to_degree_to_ind(target_degrees) return source_degree_to_ind, target_degree_to_ind
def dwpc_to_degrees(graph, metapath, damping=0.5, ignore_zeros=False): """ Yield a description of each cell in a DWPC matrix adding source and target node degree info as well as the corresponding path count. """ metapath = graph.metagraph.get_metapath(metapath) _, _, source_adj_mat = metaedge_to_adjacency_matrix(graph, metapath[0], dense_threshold=0.7) _, _, target_adj_mat = metaedge_to_adjacency_matrix(graph, metapath[-1], dense_threshold=0.7) source_degrees = source_adj_mat.sum(axis=1).flat target_degrees = target_adj_mat.sum(axis=0).flat del source_adj_mat, target_adj_mat source_path = graph.get_nodes_path(metapath.source(), file_format='tsv') source_node_df = pandas.read_table(source_path) source_node_names = list(source_node_df['name']) target_path = graph.get_nodes_path(metapath.target(), file_format='tsv') target_node_df = pandas.read_table(target_path) target_node_names = list(target_node_df['name']) row_names, col_names, dwpc_matrix = graph.read_path_counts( metapath, 'dwpc', damping) dwpc_matrix = numpy.arcsinh(dwpc_matrix / dwpc_matrix.mean()) if scipy.sparse.issparse(dwpc_matrix): dwpc_matrix = dwpc_matrix.toarray() _, _, path_count = graph.read_path_counts(metapath, 'dwpc', 0.0) if scipy.sparse.issparse(path_count): path_count = path_count.toarray() row_inds, col_inds = range(len(row_names)), range(len(col_names)) for row_ind, col_ind in itertools.product(row_inds, col_inds): dwpc_value = dwpc_matrix[row_ind, col_ind] if ignore_zeros and dwpc_value == 0: continue row = { 'source_id': row_names[row_ind], 'target_id': col_names[col_ind], 'source_name': source_node_names[row_ind], 'target_name': target_node_names[col_ind], 'source_degree': source_degrees[row_ind], 'target_degree': target_degrees[col_ind], 'path_count': path_count[row_ind, col_ind], 'dwpc': dwpc_value, } yield collections.OrderedDict(row)
def metaedge_to_data_array(graph, metaedge, dtype=numpy.bool_): """ Return an xarray.DataArray that's an adjacency matrix where source nodes are columns and target nodes are rows. """ source_node_ids, target_node_ids, adjacency_matrix = ( metaedge_to_adjacency_matrix(graph, metaedge, dtype=dtype)) dims = metaedge.source.identifier, metaedge.target.identifier coords = source_node_ids, target_node_ids data_array = xarray.DataArray(adjacency_matrix, coords=coords, dims=dims, name=metaedge.get_unicode_str()) return data_array
def dwpc_to_degrees(graph, metapath, damping=0.5, ignore_zeros=False, ignore_redundant=True): """ Yield a description of each cell in a DWPC matrix adding source and target node degree info as well as the corresponding path count. Parameters ---------- ignore_redundant: bool When metapath is symmetric, only return a single orientation of a node pair. For example, yield source-target but not also target-source, which should have the same DWPC. """ metapath = graph.metagraph.get_metapath(metapath) _, _, source_adj_mat = metaedge_to_adjacency_matrix(graph, metapath[0], dense_threshold=0.7) _, _, target_adj_mat = metaedge_to_adjacency_matrix(graph, metapath[-1], dense_threshold=0.7) source_degrees = source_adj_mat.sum(axis=1).flat target_degrees = target_adj_mat.sum(axis=0).flat del source_adj_mat, target_adj_mat source_path = graph.get_nodes_path(metapath.source(), file_format='tsv') source_node_df = pandas.read_csv(source_path, sep='\t') source_node_names = list(source_node_df['name']) target_path = graph.get_nodes_path(metapath.target(), file_format='tsv') target_node_df = pandas.read_csv(target_path, sep='\t') target_node_names = list(target_node_df['name']) row_names, col_names, dwpc_matrix = graph.read_path_counts( metapath, 'dwpc', damping) dwpc_matrix = numpy.arcsinh(dwpc_matrix / dwpc_matrix.mean()) if scipy.sparse.issparse(dwpc_matrix): dwpc_matrix = dwpc_matrix.toarray() _, _, path_count = graph.read_path_counts(metapath, 'dwpc', 0.0) if scipy.sparse.issparse(path_count): path_count = path_count.toarray() if ignore_redundant and metapath.is_symmetric(): pairs = itertools.combinations_with_replacement( range(len(row_names)), 2) else: pairs = itertools.product(range(len(row_names)), range(len(col_names))) for row_ind, col_ind in pairs: dwpc_value = dwpc_matrix[row_ind, col_ind] if ignore_zeros and dwpc_value == 0: continue row = { 'source_id': row_names[row_ind], 'target_id': col_names[col_ind], 'source_name': source_node_names[row_ind], 'target_name': target_node_names[col_ind], 'source_degree': source_degrees[row_ind], 'target_degree': target_degrees[col_ind], 'path_count': path_count[row_ind, col_ind], 'dwpc': dwpc_value, } yield collections.OrderedDict(row)