コード例 #1
0
def test_node_features():
    # Todo this test requires attention
    # Tests node featurisers for a residue graph:
    # Amino acid features, ESM embedding, DSSP features, aaindex features

    file_path = Path(__file__).parent / "test_data/4hhb.pdb"

    node_feature_functions = {
        "node_metadata_functions": [
            expasy_protein_scale,  # Todo we need to refactor node data assingment flow
            meiler_embedding,
            # rsa,
            # asa,
            # phi,
            # psi,
            # secondary_structure,
            # partial(aaindex1, accession="FAUJ880111"),
        ]
    }
    config = ProteinGraphConfig(**node_feature_functions)
    G = construct_graph(pdb_path=str(file_path), config=config)

    # Check for existence of features
    for n, d in G.nodes(data=True):
        # assert "meiler_embedding" in d # Todo these functions return pd.Series, rather than adding to the node
        # assert expasy_protein_scale in d
        # assert "rsa" in d
        # assert "asa" in d
        # assert "phi" in d
        # assert "psi" in d
        # assert "secondary_structure" in d
        continue
コード例 #2
0
def test_insertion_handling():
    configs = {
        "granularity": "CA",
        "keep_hets": False,
        "insertions": False,
        "verbose": False,
        "node_metadata_functions": [meiler_embedding, expasy_protein_scale],
        "edge_construction_functions": [
            add_peptide_bonds,
            add_hydrogen_bond_interactions,
            add_ionic_interactions,
            add_aromatic_sulphur_interactions,
            add_hydrophobic_interactions,
            add_cation_pi_interactions,
        ],
    }

    config = ProteinGraphConfig(**configs)

    # This is a nasty PDB with a lot of insertions and altlocs
    g = construct_graph(config=config, pdb_code="6OGE")

    assert len(g.graph["sequence_A"]) + len(g.graph["sequence_B"]) + len(
        g.graph["sequence_C"]
    ) + len(g.graph["sequence_D"]) + len(g.graph["sequence_E"]) == len(g)
    assert g.graph["coords"].shape[0] == len(g)
コード例 #3
0
def test_sequence_features():
    # Tests sequence featurisers for a residue graph:
    # ESM and BioVec embeddings, propy and sequence descriptors
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"

    sequence_feature_functions = {
        "graph_metadata_functions": [
            # esm_sequence_embedding,
            # esm_residue_embedding,
            biovec_sequence_embedding,
            molecular_weight,
        ]
    }
    config = ProteinGraphConfig(**sequence_feature_functions)
    G = construct_graph(pdb_path=str(file_path), config=config)

    # Check for existence on sequence-based features as node-level features
    # for n, d in G.nodes(data=True):
    # Todo this can probably be improved.
    # This only checks for the existence and shape of the esm_embedding for each node
    # assert "esm_embedding" in d
    # assert len(d["esm_embedding"]) == 1280

    # Check for existence of sequence-based features as Graph-level features
    for chain in G.graph["chain_ids"]:
        assert f"sequence_{chain}" in G.graph
        # assert f"esm_embedding_{chain}" in G.graph
        assert f"biovec_embedding_{chain}" in G.graph
        assert f"molecular_weight_{chain}" in G.graph
コード例 #4
0
ファイル: test_subgraphs.py プロジェクト: a-r-j/graphein
def test_extract_subgraph_from_bond_type():
    """Tests subgraph extraction from bond type"""
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"
    config = ProteinGraphConfig(
        edge_construction_functions=[add_peptide_bonds, add_ionic_interactions]
    )
    G = construct_graph(pdb_path=str(file_path))  # , config=config)

    BOND_TYPES = ["ionic"]

    s_g = extract_subgraph_by_bond_type(G, BOND_TYPES, filter_dataframe=True)

    for u, v, d in G.edges(data=True):
        if d["kind"] in BOND_TYPES:
            assert u in s_g.nodes()
            assert v in s_g.nodes()
            assert (u, v) in s_g.edges()

    for u, v, d in s_g.edges(data=True):
        for bond in list(d["kind"]):
            assert bond in BOND_TYPES

    s_g = extract_subgraph_by_bond_type(
        G, BOND_TYPES, filter_dataframe=True, inverse=True
    )

    for u, v, d in G.edges(data=True):
        if d["kind"] in BOND_TYPES:
            assert (u, v) not in s_g.edges()

    for u, v, d in s_g.edges(data=True):
        for bond in list(d["kind"]):
            assert bond not in BOND_TYPES
コード例 #5
0
ファイル: test_subgraphs.py プロジェクト: a-r-j/graphein
def test_extract_subgraph_from_chains():
    """Tests subgraph extraction from chains."""
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"
    G = construct_graph(pdb_path=str(file_path))

    CHAINS = ["A", "C"]
    s_g = extract_subgraph_from_chains(G, CHAINS, filter_dataframe=True)

    # Test we only selected the correct chains
    for n, d in s_g.nodes(data=True):
        assert d["chain_id"] in CHAINS

    # Test we have extracted all the nodes
    for n, d in G.nodes(data=True):
        if d["chain_id"] in CHAINS:
            assert n in s_g.nodes()

    # Test the dataframe is correct
    assert s_g.graph["pdb_df"]["chain_id"].isin(CHAINS).all()

    s_g = extract_subgraph_from_chains(
        G, CHAINS, filter_dataframe=True, inverse=True
    )

    # Test we only selected the correct chains
    for n, d in s_g.nodes(data=True):
        assert d["chain_id"] not in CHAINS

    # Test we have extracted all the nodes
    for n, d in G.nodes(data=True):
        if d["chain_id"] in CHAINS:
            assert n not in s_g.nodes()
コード例 #6
0
ファイル: test_subgraphs.py プロジェクト: a-r-j/graphein
def test_extract_subgraph_from_sequence_position():
    """Tests subgraph extraction from sequence position."""
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"
    G = construct_graph(pdb_path=str(file_path))

    SEQ_POS = list(range(1, 50, 2))

    s_g = extract_subgraph_by_sequence_position(
        G,
        SEQ_POS,
        filter_dataframe=True,
    )
    # Test we only selected the correct chains
    for n, d in s_g.nodes(data=True):
        assert d["residue_number"] in SEQ_POS

    # Test we have extracted all the nodes
    for n, d in G.nodes(data=True):
        if d["residue_number"] in SEQ_POS:
            assert n in s_g.nodes()

    # Test the dataframe is correct
    assert s_g.graph["pdb_df"]["residue_number"].isin(SEQ_POS).all()

    s_g = extract_subgraph_by_sequence_position(
        G, SEQ_POS, filter_dataframe=True, inverse=True
    )
    # Test we only selected the correct chains
    for n, d in s_g.nodes(data=True):
        assert d["residue_number"] not in SEQ_POS

    # Test we have extracted all the nodes
    for n, d in G.nodes(data=True):
        if d["residue_number"] in SEQ_POS:
            assert n not in s_g.nodes()
コード例 #7
0
ファイル: test_subgraphs.py プロジェクト: a-r-j/graphein
def test_extract_subgraph_from_point():
    """Tests subgraph extraction from a spherical selection."""
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"
    G = construct_graph(pdb_path=str(file_path))

    POINT = np.array([0.0, 0.0, 0.0])
    RADIUS = 10
    s_g = extract_subgraph_from_point(G, POINT, RADIUS, filter_dataframe=True)

    # Check all nodes are within the sphere
    for n, d in s_g.nodes(data=True):
        assert np.linalg.norm(d["coords"] - POINT) < RADIUS

    # Check we have extracted all the nodes
    for n, d in G.nodes(data=True):
        if np.linalg.norm(d["coords"] - POINT) < RADIUS:
            assert n in s_g.nodes()

    s_g = extract_subgraph_from_point(
        G, POINT, RADIUS, filter_dataframe=True, inverse=True
    )

    # Check all nodes are not within the sphere
    for n, d in s_g.nodes(data=True):
        assert np.linalg.norm(d["coords"] - POINT) > RADIUS

    # Check we have extracted all the nodes
    for n, d in G.nodes(data=True):
        if np.linalg.norm(d["coords"] - POINT) > RADIUS:
            assert n in s_g.nodes()
コード例 #8
0
def test_distance_edges():
    """Example-based test that distance-based edge construction works correctly

    Uses 4hhb PDB file as an example test case.
    """
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"

    edge_functions = {
        "edge_construction_functions": [
            partial(add_k_nn_edges, k=5, long_interaction_threshold=10),
            add_hydrophobic_interactions,
            add_aromatic_interactions,  # Todo removed for now as ring centroids require precomputing
            add_aromatic_sulphur_interactions,
            add_delaunay_triangulation,
            add_cation_pi_interactions,
            add_peptide_bonds,
            add_hydrogen_bond_interactions,
            add_disulfide_interactions,
            add_ionic_interactions,
            partial(
                add_distance_threshold,
                threshold=12,
                long_interaction_threshold=10,
            ),
        ]
    }
    config = ProteinGraphConfig(**edge_functions)
    G = construct_graph(pdb_path=str(file_path), config=config)
    assert G is not None
コード例 #9
0
ファイル: test_subgraphs.py プロジェクト: a-r-j/graphein
def test_extract_subgraph_from_atom_types():
    """Tests subgraph extraction from a list of allowed atom types"""
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"
    G = construct_graph(pdb_path=str(file_path))

    ATOM_TYPES = ["CA"]
    g = extract_subgraph_from_atom_types(G, ATOM_TYPES, filter_dataframe=True)
    assert isinstance(g, nx.Graph)
    assert len(g) == len(G)
コード例 #10
0
ファイル: test_utils.py プロジェクト: a-r-j/graphein
def test_save_graph_to_pdb():
    g = construct_graph(pdb_code="4hhb")

    save_graph_to_pdb(g, "/tmp/test_graph.pdb")

    a = read_pdb_to_dataframe("/tmp/test_graph.pdb").df["ATOM"]
    # Check file exists
    assert os.path.isfile("/tmp/test_graph.pdb")

    # Check for equivalence between saved and existing DFs.
    # We drop the line_idx columns as these will be renumbered
    assert_frame_equal(
        a.drop(["line_idx"], axis=1),
        g.graph["pdb_df"].drop(["line_idx"], axis=1),
    )
    h = construct_graph(pdb_path="/tmp/test_graph.pdb")

    # We check for isomorphism rather than equality as array features are not comparable
    assert nx.is_isomorphic(g, h)
コード例 #11
0
ファイル: test_amino_acid.py プロジェクト: a-r-j/graphein
def test_amino_acid_one_hot_example():
    """Example-based test on 4hhb for `amino_acid_onehot`."""

    # Test np array
    config = ProteinGraphConfig(node_metadata_functions=[amino_acid_one_hot])
    g = construct_graph(pdb_code="4hhb", config=config)

    for n, d in g.nodes(data=True):
        assert sum(d["amino_acid_one_hot"]) == 1

    # Test pd.Series
    config = ProteinGraphConfig(node_metadata_functions=[
        partial(amino_acid_one_hot, return_array=False)
    ])
    g = construct_graph(pdb_code="4hhb", config=config)

    for n, d in g.nodes(data=True):
        assert sum(d["amino_acid_one_hot"]) == 1
        assert (d["amino_acid_one_hot"].idxmax() == RESI_THREE_TO_1[
            d["residue_name"]])
コード例 #12
0
def test_chain_selection():
    """Example-based test that chain selection works correctly.

    Uses 4hhb PDB file as an example test case.
    """
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"
    G = construct_graph(pdb_path=str(file_path))

    # Check default construction contains all chains
    assert G.graph["chain_ids"] == ["A", "B", "C", "D"]
    # Check nodes contain residues from chains
    for n, d in G.nodes(data=True):
        assert d["chain_id"] in ["A", "B", "C", "D"]

    # Check graph contains only chain selection
    G = construct_graph(pdb_path=str(file_path), chain_selection="AD")
    assert G.graph["chain_ids"] == ["A", "D"]
    # Check nodes only contain residues from chain selection
    for n, d in G.nodes(data=True):
        assert d["chain_id"] in ["A", "D"]
コード例 #13
0
def test_edges_do_not_add_nodes_for_chain_subset():
    new_funcs = {
        "edge_construction_functions": [
            add_peptide_bonds,
            add_hydrogen_bond_interactions,
            add_disulfide_interactions,
            add_ionic_interactions,
            add_aromatic_interactions,
            add_aromatic_sulphur_interactions,
            add_cation_pi_interactions,
        ],
    }
    config = ProteinGraphConfig(**new_funcs)
    g = construct_graph(config=config, pdb_code="2vvi", chain_selection="A")
    assert len(g) == 217
    g = construct_graph(config=config, pdb_code="2vvi", chain_selection="B")
    assert len(g) == 219
    g = construct_graph(config=config, pdb_code="2vvi", chain_selection="C")
    assert len(g) == 222
    g = construct_graph(config=config, pdb_code="2vvi", chain_selection="D")
    assert len(g) == 219
コード例 #14
0
ファイル: test_geometry.py プロジェクト: a-r-j/graphein
def test_add_beta_carbon_vector():
    config = ProteinGraphConfig(edge_construction_functions=[
        partial(add_beta_carbon_vector, scale=True)
    ], )
    g = construct_graph(pdb_code="1lds", config=config)

    raw_pdb = g.graph["raw_pdb_df"]
    for n, d in g.nodes(data=True):
        # Check that the node has the correct attributes
        assert "c_beta_vector" in d.keys()
        # Check the vector is of the correct dimensionality
        assert d["c_beta_vector"].shape == (3, )

        # check glycines are zero
        if d["residue_name"] == "GLY":
            np.testing.assert_equal(d["c_beta_vector"],
                                    np.array([0.0, 0.0, 0.0]))
        else:
            # Check scaled vector has norm close 1
            np.testing.assert_almost_equal(np.linalg.norm(d["c_beta_vector"]),
                                           1.0)

    # Test unscaled vector
    config = ProteinGraphConfig(edge_construction_functions=[
        partial(add_beta_carbon_vector, scale=False)
    ], )
    g = construct_graph(pdb_code="1lds", config=config)

    for n, d in g.nodes(data=True):
        # check glycines are zero
        if d["residue_name"] == "GLY":
            np.testing.assert_equal(d["c_beta_vector"],
                                    np.array([0.0, 0.0, 0.0]))
        else:
            # Check the vector is pointing in the correct direction
            cb_true = np.array(
                raw_pdb[raw_pdb["node_id"] == n][raw_pdb["atom_name"] == "CB"][
                    ["x_coord", "y_coord", "z_coord"]]).T.squeeze()
            np.testing.assert_almost_equal(cb_true,
                                           d["coords"] + d["c_beta_vector"])
コード例 #15
0
ファイル: test_geometry.py プロジェクト: a-r-j/graphein
def test_add_sidechain_vector():
    config = ProteinGraphConfig(edge_construction_functions=[
        partial(add_sidechain_vector, scale=True)
    ], )
    g = construct_graph(pdb_code="1lds", config=config)

    for n, d in g.nodes(data=True):
        # Check that the node has the correct attributes
        assert "sidechain_vector" in d.keys()
        # Check the vector is of the correct dimensionality
        assert d["sidechain_vector"].shape == (3, )

        # check glycines are zero
        if d["residue_name"] == "GLY":
            np.testing.assert_equal(d["sidechain_vector"],
                                    np.array([0.0, 0.0, 0.0]))
        else:
            # Check scaled vector has norm close 1
            np.testing.assert_almost_equal(
                np.linalg.norm(d["sidechain_vector"]), 1.0)

    # Test unscaled vector
    config = ProteinGraphConfig(edge_construction_functions=[
        partial(add_sidechain_vector, scale=False)
    ], )
    g = construct_graph(pdb_code="1lds", config=config)

    for n, d in g.nodes(data=True):
        # check glycines are zero
        if d["residue_name"] == "GLY":
            np.testing.assert_equal(d["sidechain_vector"],
                                    np.array([0.0, 0.0, 0.0]))
        else:
            # Check the vector is pointing in the correct direction
            sc_true = np.array(
                g.graph["rgroup_df"].groupby("node_id").mean().loc[n][[
                    "x_coord", "y_coord", "z_coord"
                ]])
            np.testing.assert_almost_equal(sc_true,
                                           d["coords"] + d["sidechain_vector"])
コード例 #16
0
ファイル: test_utils.py プロジェクト: a-r-j/graphein
def test_save_rgroup_df_to_pdb():
    g = construct_graph(pdb_code="4hhb")

    save_rgroup_df_to_pdb(g, "/tmp/test_rgroup.pdb")
    a = read_pdb_to_dataframe("/tmp/test_rgroup.pdb").df["ATOM"]
    # Check file exists
    assert os.path.isfile("/tmp/test_rgroup.pdb")

    # We drop the line_idx columns as these will be renumbered
    assert_frame_equal(
        a.drop(["line_idx"], axis=1),
        g.graph["rgroup_df"].drop(["line_idx"], axis=1),
    )
コード例 #17
0
def test_construct_graph():
    """Example-based test that graph construction works correctly.

    Uses 4hhb PDB file as an example test case.
    """
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"
    G = construct_graph(pdb_path=str(file_path))
    assert isinstance(G, nx.Graph)
    assert len(G) == 574

    # Check number of peptide bonds
    peptide_bond_edges = [(u, v) for u, v, d in G.edges(data=True)
                          if d["kind"] == {"peptide_bond"}]
    assert len(peptide_bond_edges) == 570
コード例 #18
0
ファイル: test_subgraphs.py プロジェクト: a-r-j/graphein
def test_extract_k_hop_subgraph():
    """Tests k-hop subgraph extraction."""
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"
    G = construct_graph(pdb_path=str(file_path))

    CENTRAL_NODE = "B:SER:49"
    K = 1
    s_g = extract_k_hop_subgraph(G, CENTRAL_NODE, K, filter_dataframe=True)

    for n in s_g.nodes():
        if n != CENTRAL_NODE:
            assert n in list(G.neighbors(CENTRAL_NODE))

    for n in list(G.neighbors(CENTRAL_NODE)):
        assert n in s_g.nodes()
コード例 #19
0
ファイル: test_subgraphs.py プロジェクト: a-r-j/graphein
def test_surface_subgraph():
    """Tests surface subgraph extraction."""
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"
    config = ProteinGraphConfig(
        graph_metadata_functions=[rsa], dssp_config=DSSPConfig()
    )
    G = construct_graph(pdb_path=str(file_path), config=config)

    RSA_THRESHOLD: float = 0.2
    s_g = extract_surface_subgraph(G, RSA_THRESHOLD, filter_dataframe=True)

    for n, d in s_g.nodes(data=True):
        assert d["rsa"] >= RSA_THRESHOLD

    for n, d in G.nodes(data=True):
        if d["rsa"] >= RSA_THRESHOLD:
            assert n in s_g.nodes(), print(n, d)
コード例 #20
0
ファイル: test_subgraphs.py プロジェクト: a-r-j/graphein
def test_node_list_subgraphing():
    """Tests subgraph extraction from a list of nodes."""
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"
    NODE_LIST = ["C:ALA:28", "C:ARG:31", "D:LEU:75", "A:THR:38"]

    G = construct_graph(pdb_path=str(file_path))

    g = extract_subgraph_from_node_list(G, NODE_LIST, filter_dataframe=True)

    # Check we get back a graph and it contains the correct nodes
    assert isinstance(g, nx.Graph)
    assert len(g) == len(NODE_LIST)
    for n in g.nodes():
        assert n in NODE_LIST
    assert (
        g.graph["pdb_df"]["node_id"]
        .str.contains("|".join(NODE_LIST), case=True)
        .all()
    )

    # Check the list of nodes is the same as the list of nodes in the original graph
    returned_node_list = extract_subgraph_from_node_list(
        G, NODE_LIST, return_node_list=True
    )
    assert all(elem in NODE_LIST for elem in returned_node_list)

    # Check there is no overlap when we inverse the selection
    g = extract_subgraph_from_node_list(
        G, NODE_LIST, inverse=True, filter_dataframe=True
    )
    assert len(g) == len(G) - len(NODE_LIST)
    for n in g.nodes():
        assert n not in NODE_LIST

    assert not (
        g.graph["pdb_df"]["node_id"]
        .str.contains("|".join(NODE_LIST), case=True)
        .any()
    )

    returned_node_list = extract_subgraph_from_node_list(
        G, NODE_LIST, inverse=True, return_node_list=True
    )

    assert all(elem not in NODE_LIST for elem in returned_node_list)
コード例 #21
0
ファイル: cli.py プロジェクト: a-r-j/graphein
def main(config_path, pdb_path, output_path):
    """Build the graphs and save them in output dir."""
    config = None
    if config_path:
        config = parse_config(path=config_path)

    if pdb_path.is_file():
        pdb_paths = [pdb_path]
    elif pdb_path.is_dir():
        pdb_paths = [pdb for pdb in pdb_path.glob("*.pdb")]
    else:
        raise NotImplementedError(
            "Given PDB path needs to point to either a pdb file or a directory with pdb files."
        )

    for path in pdb_paths:
        g = construct_graph(config=config, pdb_path=str(path))
        nx.write_gpickle(g, str(output_path / f"{path.stem}.pickle"))
コード例 #22
0
ファイル: test_subgraphs.py プロジェクト: a-r-j/graphein
def test_secondary_structure_subgraph():
    """Tests secondary subgraph extraction."""
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"
    config = ProteinGraphConfig(
        graph_metadata_functions=[secondary_structure],
        dssp_config=DSSPConfig(),
    )
    G = construct_graph(pdb_path=str(file_path), config=config)

    SS_ELEMENTS: List[str] = ["H"]
    s_g = extract_subgraph_from_secondary_structure(
        G, SS_ELEMENTS, filter_dataframe=True
    )

    for _, d in s_g.nodes(data=True):
        assert d["ss"] in SS_ELEMENTS

    for n, d in G.nodes(data=True):
        if d["ss"] in SS_ELEMENTS:
            assert n in s_g.nodes()
コード例 #23
0
ファイル: test_subgraphs.py プロジェクト: a-r-j/graphein
def test_successful_pickle():
    """Tests subgraphs can be successfully pickled and unpickled"""
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"
    config = ProteinGraphConfig(
        graph_metadata_functions=[secondary_structure],
        dssp_config=DSSPConfig(),
    )
    G = construct_graph(pdb_path=str(file_path), config=config)
    s_g = extract_subgraph_from_residue_types(
        G,
        residue_types=["ALA", "SER", "MET"],
        update_coords=True,
        filter_dataframe=True,
        recompute_distmat=True,
    )

    with open("/tmp/test_graph.p", "wb") as f:
        pickle.dump(s_g, f)

    with open("/tmp/test_graph.p", "rb") as f:
        loaded_graph = pickle.load(f)

    assert nx.is_isomorphic(s_g, loaded_graph)
コード例 #24
0
ファイル: test_subgraphs.py プロジェクト: a-r-j/graphein
def test_extract_subgraph_from_residue_types():
    """Tests subgraph extraction from a list of nodes."""
    file_path = Path(__file__).parent / "test_data/4hhb.pdb"
    RESIDUE_TYPES = ["ALA", "SER", "GLY"]
    ALANINES = 72
    SERINES = 32
    GLYCINES = 40

    G = construct_graph(pdb_path=str(file_path))

    g = extract_subgraph_from_residue_types(
        G, RESIDUE_TYPES, filter_dataframe=True
    )

    # Check we get back a graph and it contains the correct nodes
    assert isinstance(g, nx.Graph)
    assert len(g) == ALANINES + SERINES + GLYCINES
    for n, d in g.nodes(data=True):
        assert d["residue_name"] in RESIDUE_TYPES
    assert (
        g.graph["pdb_df"]["residue_name"]
        .str.contains("|".join(RESIDUE_TYPES), case=True)
        .all()
    )

    assert (
        len([n for n, d in g.nodes(data=True) if d["residue_name"] == "ALA"])
        == ALANINES
    )
    assert (
        len([n for n, d in g.nodes(data=True) if d["residue_name"] == "GLY"])
        == GLYCINES
    )
    assert (
        len([n for n, d in g.nodes(data=True) if d["residue_name"] == "SER"])
        == SERINES
    )

    # Check the list of nodes is the same as the list of nodes in the original graph
    returned_node_list = extract_subgraph_from_node_list(
        G, RESIDUE_TYPES, return_node_list=True
    )
    assert all(elem in RESIDUE_TYPES for elem in returned_node_list)

    # Check there is no overlap when we inverse the selection
    g = extract_subgraph_from_residue_types(
        G, RESIDUE_TYPES, inverse=True, filter_dataframe=True
    )

    # assert len(g) == (len(G) - GLYCINES - ALANINES - SERINES)
    for n in g.nodes():
        assert n not in RESIDUE_TYPES

    assert not (
        g.graph["pdb_df"]["residue_name"]
        .str.contains("|".join(RESIDUE_TYPES), case=True)
        .any()
    )

    returned_node_list = extract_subgraph_from_residue_types(
        G, RESIDUE_TYPES, inverse=True, return_node_list=True
    )

    assert all(elem not in RESIDUE_TYPES for elem in returned_node_list)
コード例 #25
0
        "granularity": "atom",
        "keep_hets": False,
        "deprotonate": True,
        "insertions": False,
        "verbose": False,
    }

    config = ProteinGraphConfig(**configs)
    config.edge_construction_functions = [
        add_atomic_edges,
        add_ring_status,
        add_bond_order,
    ]

    config.node_metadata_functions = [meiler_embedding, expasy_protein_scale]

    g = construct_graph(
        config=config, pdb_path="../examples/pdbs/3eiy.pdb", pdb_code="3eiy"
    )

    p = plotly_protein_structure_graph(
        g,
        30,
        (1000, 2000),
        colour_nodes_by="element_symbol",
        colour_edges_by="kind",
        label_node_ids=False,
    )

    p.show()
コード例 #26
0
ファイル: test_distance.py プロジェクト: a-r-j/graphein
def generate_graph():
    """Generate PDB network.
    This is a helper function.
    """
    return construct_graph(pdb_path=str(DATA_PATH))
コード例 #27
0
    }

    config = ProteinGraphConfig(**configs)

    config.edge_construction_functions = [
        salt_bridge,
        hydrogen_bond,
        van_der_waals,
        pi_cation,
        pi_stacking,
        hydrophobic,
        t_stacking,
    ]
    # Test High-level API

    # Iterate over rows to produce Graph, pickle graph and label
    for row in tqdm(range(len(df))):
        example = df.iloc[row]
        file_path = f'pdbs/{example["Free PDB"]}.pdb'
        contact_file = f'contacts/{example["Free PDB"]}_contacts.tsv'

        g = construct_graph(config=config, pdb_code=example["Free PDB"])

        print(g)

    print("Successfully computed all graphs")

# Example Run:
# python make_rearrangement_data.py -o 'none' -n 'meiler' -s True -c '/home/arj39/Documents/github/getcontacts'
# python make_rearrangement_data.py -o 'none' -n 'meiler' -s True -c '/Users/arianjamasb/github/getcontacts'
コード例 #28
0
ファイル: conversion.py プロジェクト: a-r-j/graphein
    for i, (_, _, feat_dict) in enumerate(G.edges(data=True)):
        for key, value in feat_dict.items():
            data[str(key)] = (list(value) if i == 0 else data[str(key)] +
                              list(value))

    # Add graph-level features
    for feat_name in G.graph:
        data[str(feat_name)] = [G.graph[feat_name]]

    data["edge_index"] = edge_index.view(2, -1)
    data = Data.from_dict(data)
    data.num_nodes = G.number_of_nodes()

    return data


if __name__ == "__main__":
    from graphein.protein.config import ProteinGraphConfig
    from graphein.protein.graphs import construct_graph

    g = construct_graph(pdb_code="3eiy", config=ProteinGraphConfig())
    assert type(g) is nx.Graph

    # print(SUPPORTED_FORMATS)

    convertor = GraphFormatConvertor(src_format="nx",
                                     dst_format="pyg",
                                     verbose="gnn")
    pyg = convertor(g)
    assert type(pyg) is torch_geometric.data.Data