Beispiel #1
0
def test_sparsify_or_densify(array, dense_threshold, expect_sparse):
    # test with dense input
    array = numpy.array(array, dtype=numpy.bool_)
    output = sparsify_or_densify(array, dense_threshold)
    assert scipy.sparse.issparse(output) == expect_sparse

    # test with sparse input
    array = scipy.sparse.csc_matrix(array)
    output = sparsify_or_densify(array, dense_threshold)
    assert scipy.sparse.issparse(output) == expect_sparse
Beispiel #2
0
def test_sparsify_or_densify(array, dense_threshold, expect_sparse):
    # test with dense input
    array = numpy.array(array, dtype=numpy.bool_)
    output = sparsify_or_densify(array, dense_threshold)
    assert scipy.sparse.issparse(output) == expect_sparse

    # test with sparse input
    array = scipy.sparse.csc_matrix(array)
    output = sparsify_or_densify(array, dense_threshold)
    assert scipy.sparse.issparse(output) == expect_sparse
Beispiel #3
0
def dwwc_sequential(graph, metapath, damping=0.5, dense_threshold=0, dtype=numpy.float64):
    """
    Compute the degree-weighted walk count (DWWC) in which nodes can be
    repeated within a path.

    Parameters
    ----------
    graph : hetio.hetnet.Graph
    metapath : hetio.hetnet.MetaPath
    damping : float
    dense_threshold : float (0 <= dense_threshold <= 1)
        sets the density threshold at which a sparse matrix will be
        converted to a dense automatically.
    dtype : dtype object
    """
    dwwc_matrix = None
    row_names = None
    for metaedge in metapath:
        rows, cols, adj_mat = hetmatpy.matrix.metaedge_to_adjacency_matrix(
            graph, metaedge, dense_threshold=dense_threshold, dtype=dtype)
        adj_mat = _degree_weight(adj_mat, damping, dtype=dtype)
        if dwwc_matrix is None:
            row_names = rows
            dwwc_matrix = adj_mat
        else:
            dwwc_matrix = dwwc_matrix @ adj_mat
            dwwc_matrix = sparsify_or_densify(dwwc_matrix, dense_threshold)
    return row_names, cols, dwwc_matrix
Beispiel #4
0
def _dwpc_approx(graph,
                 metapath,
                 damping=0.5,
                 dense_threshold=0,
                 dtype=numpy.float64):
    """
    Compute an approximation of DWPC. Only removes the diagonal for the first
    repeated node, and any disjoint repetitions that follow the last occurrence
    of the first repeating node.

    Examples
    --------
    GiGbCrC -> Identical output to DWPC
    GiGbCbGiG -> Approximation
    """
    dwpc_matrix = None
    row_names = None
    # Find the first repeated metanode and where it occurs
    nodes = metapath.get_nodes()
    repeated_nodes = [
        node for i, node in enumerate(nodes) if node in nodes[i + 1:]
    ]
    first_repeat = repeated_nodes[0]
    repeated_indices = [i for i, v in enumerate(nodes) if v == first_repeat]
    for i, segment in enumerate(repeated_indices[1:]):
        rows, cols, dwpc_matrix = dwpc(graph,
                                       metapath[repeated_indices[i]:segment],
                                       damping=damping,
                                       dense_threshold=dense_threshold,
                                       dtype=dtype)
        if row_names is None:
            row_names = rows
    # Add head and tail segments, if applicable
    if repeated_indices[0] != 0:
        row_names, _, head_seg = dwwc(graph,
                                      metapath[0:repeated_indices[0]],
                                      damping=damping,
                                      dense_threshold=dense_threshold,
                                      dtype=dtype)
        dwpc_matrix = head_seg @ dwpc_matrix
    if nodes[repeated_indices[-1]] != nodes[-1]:
        _, cols, tail_seg = dwpc(graph,
                                 metapath[repeated_indices[-1]:],
                                 damping=damping,
                                 dense_threshold=dense_threshold,
                                 dtype=dtype)
        dwpc_matrix = dwpc_matrix @ tail_seg
    dwpc_matrix = sparsify_or_densify(dwpc_matrix, dense_threshold)
    return row_names, cols, dwpc_matrix
Beispiel #5
0
def dwwc_chain(graph, metapath, damping=0.5, dense_threshold=0, dtype=numpy.float64):
    """
    Uses optimal matrix chain multiplication as in numpy.multi_dot, but allows
    for sparse matrices. Uses ordering modified from numpy.linalg.linalg._multi_dot
    (https://git.io/vh31f) which is released under a 3-Clause BSD License
    (https://git.io/vhCDC).
    """
    metapath = graph.metagraph.get_metapath(metapath)
    array_dims = [graph.count_nodes(mn) for mn in metapath.get_nodes()]
    row_ids = hetmatpy.matrix.get_node_identifiers(graph, metapath.source())
    columns_ids = hetmatpy.matrix.get_node_identifiers(graph, metapath.target())
    ordering = _dimensions_to_ordering(array_dims)
    dwwc_matrix = _multi_dot(metapath, ordering, 0, len(metapath) - 1, graph, damping, dense_threshold, dtype)
    dwwc_matrix = sparsify_or_densify(dwwc_matrix, dense_threshold)
    return row_ids, columns_ids, dwwc_matrix
Beispiel #6
0
def dwwc_recursive(graph, metapath, damping=0.5, dense_threshold=0, dtype=numpy.float64):
    """
    Recursive DWWC implementation to take better advantage of caching.
    """
    rows, cols, adj_mat = hetmatpy.matrix.metaedge_to_adjacency_matrix(
        graph, metapath[0], dense_threshold=dense_threshold, dtype=dtype)
    adj_mat = _degree_weight(adj_mat, damping, dtype=dtype)
    if len(metapath) > 1:
        _, cols, dwwc_next = dwwc(
            graph, metapath[1:], damping=damping, dense_threshold=dense_threshold,
            dtype=dtype, dwwc_method=dwwc_recursive)
        dwwc_matrix = adj_mat @ dwwc_next
    else:
        dwwc_matrix = adj_mat
    dwwc_matrix = sparsify_or_densify(dwwc_matrix, dense_threshold)
    return rows, cols, dwwc_matrix
Beispiel #7
0
 def wrapper(*args, **kwargs):
     bound_args = signature.bind(*args, **kwargs)
     bound_args.apply_defaults()
     arguments = bound_args.arguments
     graph = arguments['graph']
     metapath = graph.metagraph.get_metapath(arguments['metapath'])
     arguments['metapath'] = metapath
     damping = arguments['damping']
     cached_result = None
     start = time.perf_counter()
     supports_cache = isinstance(
         graph, hetmatpy.hetmat.HetMat) and graph.path_counts_cache
     if supports_cache:
         cache_key = {
             'metapath': metapath,
             'metric': metric,
             'damping': damping
         }
         cached_result = graph.path_counts_cache.get(**cache_key)
         if cached_result:
             row_names, col_names, matrix = cached_result
             matrix = sparsify_or_densify(matrix,
                                          arguments['dense_threshold'])
             matrix = matrix.astype(arguments['dtype'])
     if cached_result is None:
         if arguments['dwwc_method'] is None:
             # import default_dwwc_method here to avoid circular dependencies
             from hetmatpy.degree_weight import default_dwwc_method
             arguments['dwwc_method'] = default_dwwc_method
         row_names, col_names, matrix = user_function(**arguments)
     if supports_cache:
         runtime = time.perf_counter() - start
         graph.path_counts_cache.set(**cache_key,
                                     matrix=matrix,
                                     runtime=runtime)
     return row_names, col_names, matrix