def test_path_traversal(metapath, hetmat, tmpdir): """ Test PC (path count) and DWPC (degree-weighted path count) computation on the random subgraph of Hetionet v1.0. Evaluates max path count compound-disease pair where errors are most likely to appear. """ # Read graph graph = get_graph('random-subgraph') graph_or_hetmat = graph if hetmat: graph_or_hetmat = get_graph('random-subgraph', hetmat=hetmat, directory=tmpdir) metapath = graph.metagraph.metapath_from_abbrev(metapath) # Matrix computations rows, cols, pc_matrix = dwpc(graph_or_hetmat, metapath, damping=0) rows, cols, dwpc_matrix = dwpc(graph_or_hetmat, metapath, damping=0.4) # Find compound-disease pair with the max path count i, j = numpy.unravel_index(pc_matrix.argmax(), pc_matrix.shape) compound = rows[i] disease = cols[j] # hetnetpy.pathtools computations paths = hetnetpy.pathtools.paths_between( graph, source=('Compound', compound), target=('Disease', disease), metapath=metapath, duplicates=False, ) hetnetpy_dwpc = hetnetpy.pathtools.DWPC(paths, damping_exponent=0.4) # Check matrix values match hetnetpy.pathtools assert pc_matrix[i, j] == len(paths) assert dwpc_matrix[i, j] == pytest.approx(hetnetpy_dwpc)
def test_disease_gene_example_dwwc(dwwc_method): """ Test the PC & DWWC computations in Figure 2D of Himmelstein & Baranzini (2015) PLOS Comp Bio. https://doi.org/10.1371/journal.pcbi.1004259.g002 """ graph = get_graph('disease-gene-example') metagraph = graph.metagraph # Compute GiGaD path count and DWWC matrices metapath = metagraph.metapath_from_abbrev('GiGaD') rows, cols, wc_matrix = dwwc(graph, metapath, damping=0, dwwc_method=dwwc_method) rows, cols, dwwc_matrix = dwwc(graph, metapath, damping=0.5, dwwc_method=dwwc_method) # Check row and column name assignment assert rows == ['CXCR4', 'IL2RA', 'IRF1', 'IRF8', 'ITCH', 'STAT3', 'SUMO1'] assert cols == ["Crohn's Disease", 'Multiple Sclerosis'] # Check concordance with https://doi.org/10.1371/journal.pcbi.1004259.g002 i = rows.index('IRF1') j = cols.index('Multiple Sclerosis') # Warning: the WC (walk count) and PC (path count) are only equivalent # because none of the GiGaD paths contain duplicate nodes. Since, GiGaD # contains duplicate metanodes, WC and PC are not guaranteed to be the # same. However, they happen to be equivalent for this example. assert wc_matrix[i, j] == 3 assert dwwc_matrix[i, j] == pytest.approx(0.25 + 0.25 + 32**-0.5)
def test_CbGiGiGaD_traversal(): """ Test path counts and degree-weighted path counts for the CbGiGiGaD metapath between bupropion and nicotine dependence. These values are not intended to correspond to the values from the entire Hetionet v1.0. Hence, the expected values are generated using hetio.pathtools. """ graph = get_graph('bupropion-subgraph') compound = 'DB01156' # Bupropion disease = 'DOID:0050742' # nicotine dependence metapath = graph.metagraph.metapath_from_abbrev('CbGiGiGaD') paths = hetio.pathtools.paths_between( graph, source=('Compound', compound), target=('Disease', disease), metapath=metapath, duplicates=False, ) hetio_dwpc = hetio.pathtools.DWPC(paths, damping_exponent=0.4) rows, cols, pc_matrix = dwpc(graph, metapath, damping=0) rows, cols, dwpc_matrix = dwpc(graph, metapath, damping=0.4) i = rows.index(compound) j = cols.index(disease) assert pc_matrix[i, j] == len(paths) assert dwpc_matrix[i, j] == pytest.approx(hetio_dwpc)
def test_disease_gene_example_conversion_to_hetmat(tmpdir): """ Test converting the hetmat from Figure 2C of https://doi.org/crz8 into a hetmat. """ graph = get_graph('disease-gene-example') hetmat = hetmatpy.hetmat.hetmat_from_graph(graph, tmpdir) assert list(graph.metagraph.get_nodes()) == list( hetmat.metagraph.get_nodes()) # Test GaD adjacency matrix hetnet_adj = hetmatpy.matrix.metaedge_to_adjacency_matrix( graph, 'GaD', dense_threshold=0) hetmat_adj = hetmatpy.matrix.metaedge_to_adjacency_matrix( hetmat, 'GaD', dense_threshold=0) assert hetnet_adj[0] == hetmat_adj[0] # row identifiers assert hetnet_adj[1] == hetmat_adj[1] # column identifiers assert numpy.array_equal(hetnet_adj[2], hetmat_adj[2]) # adj matrices # Test DaG adjacency matrix (hetmat only stores GaD and must transpose) hetnet_adj = hetmatpy.matrix.metaedge_to_adjacency_matrix( graph, 'DaG', dense_threshold=0) hetmat_adj = hetmatpy.matrix.metaedge_to_adjacency_matrix( hetmat, 'DaG', dense_threshold=0) assert hetnet_adj[0] == hetmat_adj[0] # row identifiers assert hetnet_adj[1] == hetmat_adj[1] # column identifiers assert numpy.array_equal(hetnet_adj[2], hetmat_adj[2]) # adj matrices
def test_disease_gene_example_hetmat_archiving(tmpdir): """ Test archiving the hetmat corresponding to the hetnet in Figure 2C at https://doi.org/crz8. """ tmpdir = pathlib.Path(tmpdir) graph = get_graph('disease-gene-example') hetmat_0_dir = tmpdir.joinpath('disease-gene-example-0.hetmat') hetmat = hetmatpy.hetmat.hetmat_from_graph(graph, hetmat_0_dir) # Test creating archive archive_path = hetmatpy.hetmat.archive.create_hetmat_archive(hetmat) with zipfile.ZipFile(archive_path) as zip_file: name_list = zip_file.namelist() expected = [ 'edges/DlT.sparse.npz', 'edges/GaD.sparse.npz', 'edges/GeT.sparse.npz', 'edges/GiG.sparse.npz', 'metagraph.json', 'nodes/Disease.tsv', 'nodes/Gene.tsv', 'nodes/Tissue.tsv', ] assert name_list == expected # Test round-tripped hetmat has same files hetmat_1_dir = tmpdir.joinpath('disease-gene-example-1.hetmat') hetmatpy.hetmat.archive.load_archive(archive_path, hetmat_1_dir) match, mismatch, errors = filecmp.cmpfiles(hetmat_0_dir, hetmat_1_dir, common=expected, shallow=False) assert match == expected assert not mismatch assert not errors
def test_dtype(metapath, dtype, dwwc_method): graph = get_graph('disease-gene-example') metapath = graph.metagraph.metapath_from_abbrev(metapath) rows, cols, dwpc_matrix = dwpc(graph, metapath, dtype=dtype, dwwc_method=dwwc_method) assert dwpc_matrix.dtype == dtype
def test_disjoint_dwpc(metapath, exp_row, exp_col, exp_data, shape): graph = get_graph('random-subgraph') metapath = graph.metagraph.metapath_from_abbrev(metapath) row, col, dwpc_matrix = dwpc(graph, metapath) # expected = numpy.array(expected, dtype=numpy.float64) expected = sparse.coo_matrix((exp_data, (exp_row, exp_col)), shape=shape) assert abs(dwpc_matrix - expected).max() == pytest.approx(0, abs=1e-7)
def test_dwpc_approx(metapath, relative): graph = get_graph('random-subgraph') metapath = graph.metagraph.metapath_from_abbrev(metapath) rows, cols, dwpc_matrix = dwpc(graph, metapath) rows, cols, dwpc_approx = _dwpc_approx(graph, metapath) rows, cols, dwwc_matrix = dwwc(graph, metapath) if relative == 'equal': assert abs( (dwpc_approx - dwpc_matrix)).max() == pytest.approx(0, abs=1e-7) else: assert numpy.sum((dwpc_approx - dwpc_matrix)) >= 0 assert abs((dwwc_matrix - dwpc_approx)).max() >= 0
def test_no_and_short_repeat(metapath, expected, path_type): exp_row, exp_col = get_nodes(metapath) graph = get_graph('disease-gene-example') metapath = graph.metagraph.metapath_from_abbrev(metapath) func_dict = {0: dwwc, 1: _dwpc_short_repeat} row, col, dwpc_matrix = func_dict[path_type](graph, metapath, damping=0.5) expected = numpy.array(expected, dtype=numpy.float64) assert abs(dwpc_matrix - expected).max() == pytest.approx(0, abs=1e-7) assert row == exp_row assert col == exp_col
def test__dwpc_baba(m_path): graph = get_graph('disease-gene-example') metagraph = graph.metagraph metapath = metagraph.metapath_from_abbrev(m_path) row_sol, col_sol, adj_sol = get_baba_matrices(m_path) row, col, dwpc_matrix = _dwpc_baba(graph, metapath, damping=0.5, dense_threshold=0) assert row_sol == row assert col_sol == col assert abs(adj_sol - dwpc_matrix).max() == pytest.approx(0, abs=1e-8)
def test__dwpc_baab(metapath, expected): exp_row, exp_col = get_nodes(metapath) graph = get_graph('disease-gene-example') metapath = graph.metagraph.metapath_from_abbrev(metapath) row, col, dwpc_matrix = _dwpc_baab(graph, metapath, damping=0.5, dense_threshold=1) expected = numpy.array(expected, dtype=numpy.float64) assert abs(dwpc_matrix - expected).max() == pytest.approx(0, abs=1e-7) assert exp_row == row assert exp_col == col
def test__dwpc_general_case(length): """ Test the functionality of dwpc_same_metanode to find DWPC within a metapath (segment) of metanode and metaedge repeats. """ graph = get_graph('disease-gene-example') metagraph = graph.metagraph m_path = 'GiG' + length * 'iG' metapath = metagraph.metapath_from_abbrev(m_path) rows, cols, dwpc_mat = _dwpc_general_case(graph, metapath, damping=0.5) exp_row, exp_col, exp_dwpc = get_general_solutions(length) # Test matrix, row, and column label output assert abs(dwpc_mat - exp_dwpc).max() == pytest.approx(0, abs=1e-7) assert rows == exp_row assert cols == exp_col
def test_CbGpPWpGaD_traversal(): """ Test path counts and degree-weighted path counts for the CbGpPWpGaD metapath between bupropion and nicotine dependence. Expected values from the network traversal methods at https://git.io/vHBh2. """ graph = get_graph('bupropion-subgraph') compound = 'DB01156' # Bupropion disease = 'DOID:0050742' # nicotine dependence metapath = graph.metagraph.metapath_from_abbrev('CbGpPWpGaD') rows, cols, pc_matrix = dwpc(graph, metapath, damping=0) rows, cols, dwpc_matrix = dwpc(graph, metapath, damping=0.4) i = rows.index(compound) j = cols.index(disease) assert pc_matrix[i, j] == 142 assert dwpc_matrix[i, j] == pytest.approx(0.03287590886921623)
def test_dwpc(metapath, expected, dense_threshold): if expected is not None: expected = numpy.array(expected, dtype=numpy.float64) graph = get_graph('disease-gene-example') metapath = graph.metagraph.metapath_from_abbrev(metapath) if expected is None: with pytest.raises(Exception): dwpc(graph, metapath, damping=0.5, dense_threshold=dense_threshold) else: row, col, dwpc_matrix = dwpc(graph, metapath, damping=0.5, dense_threshold=dense_threshold) assert abs(expected - dwpc_matrix).max() == pytest.approx(0, abs=1e-7) if dense_threshold == 1: assert sparse.issparse(dwpc_matrix) else: assert not sparse.issparse(dwpc_matrix)
def test_path_count_priority_cache(tmpdir, allocate_GB): """ Test PathCountPriorityCache by runnin the same DWWC computation three times. """ hetmat = get_graph('bupropion-subgraph', hetmat=True, directory=tmpdir) cache = hetmatpy.hetmat.caching.PathCountPriorityCache(hetmat, allocate_GB) hetmat.path_counts_cache = cache print(cache.get_stats) # First run assert sum(cache.hits.values()) == 0 row_ids, col_ids, matrix = hetmatpy.degree_weight.dwwc( graph=hetmat, metapath='CbGpPWpGaD', damping=0.5, dwwc_method=hetmatpy.degree_weight.dwwc_recursive, ) assert sum(cache.hits.values()) > 0 if allocate_GB == 0: assert cache.hits['memory'] == 0 assert cache.hits['disk'] == 0 assert cache.hits['absent'] == 4 elif allocate_GB > 0: assert cache.hits['memory'] == 0 assert cache.hits['disk'] == 0 assert cache.hits['absent'] == 4 # Second run row_ids, col_ids, matrix = hetmatpy.degree_weight.dwwc( graph=hetmat, metapath='CbGpPWpGaD', damping=0.5, dwwc_method=hetmatpy.degree_weight.dwwc_recursive, ) if allocate_GB == 0: assert cache.hits['memory'] == 0 assert cache.hits['disk'] == 0 assert cache.hits['absent'] == 8 elif allocate_GB > 0: assert cache.hits['memory'] == 1 assert cache.hits['disk'] == 0 assert cache.hits['absent'] == 4 # Save DWWC matrix path = hetmat.get_path_counts_path('CbGpPWpGaD', 'dwwc', 0.5, 'npy') path.parent.mkdir(parents=True) hetmatpy.hetmat.save_matrix(matrix, path) # Third run row_ids, col_ids, matrix = hetmatpy.degree_weight.dwwc( graph=hetmat, metapath='CbGpPWpGaD', damping=0.5, dwwc_method=hetmatpy.degree_weight.dwwc_recursive, ) if allocate_GB == 0: assert cache.hits['memory'] == 0 assert cache.hits['disk'] == 1 assert cache.hits['absent'] == 8 elif allocate_GB > 0: assert cache.hits['memory'] == 2 assert cache.hits['disk'] == 0 assert cache.hits['absent'] == 4 print(cache.get_stats)