def test_rsp_num_rings(): test_smiles = 'O=C(c1c2ccccc2cc2ccccc12)N1CCC(N2CCC[C@@H](C(=O)N3CCOCC3)C2)CC1' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles('O=C(c1c2ccccc2cc2ccccc12)N1CCC(N2CCCCC2)CC1') max_result = canonize_smiles( 'O=C(c1cccc2ccccc12)N1CCC(N2CCC[C@@H](C(=O)N3CCOCC3)C2)CC1') _test_rule_min_max(test_mol, RSPNumRings, min_result, max_result)
def test_scp_delta(): test_smiles = 'OC5CC31CC5CC1C4CCc2occc2C4CC3' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles( 'OC1CCC2(CCC3c4ccoc4CCC3C2)C1') # retain spiro max_result = canonize_smiles('OC1CC23CCC4C=CCCC4C2CC1C3') # retain bridged _test_rule_min_max(test_mol, SCPDelta, min_result, max_result)
def test_scp_num_linker_bonds(): test_smiles = 'O=C(NCCCCN1CCN(c2ccccc2)CC1)c1ccc2c(c1)Cc1ccccc1-2' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles('O=C(NCCCCN1CCNCC1)c1ccc2c(c1)Cc1ccccc1-2') max_result = canonize_smiles( 'O=C(NCCCCN1CCN(c2ccccc2)CC1)c1ccc2c(c1)CC=C2') _test_rule_min_max(test_mol, SCPNumLinkerBonds, min_result, max_result)
def test_rrp_het_atom_linked(): test_smiles = 'O=C(NCc1ccccc1)N1CCN2C(=O)OC(c3ccccc3)(c3ccccc3)[C@@H]2C1' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles( 'O=C(NCc1ccccc1)N1CCN2C(=O)OC(c3ccccc3)[C@@H]2C1') max_result = canonize_smiles('O=C1OC(c2ccccc2)(c2ccccc2)[C@@H]2CNCCN12') _test_rule_min_max(test_mol, RRPHetAtomLinked, min_result, max_result)
def get_molecules_for_scaffold(self, scaffold_smiles, data=False, default=None): """Return a list of molecule IDs which are represented by a scaffold in the graph. Note: This is determined by traversing the graph. In the case of a scaffold tree the results represent the rules used to prioritize the scaffolds. Parameters ---------- scaffold_smiles : (string) SMILES of query scaffold. data : (string or bool, optional (default=False)) The molecule node attribute returned in 2-tuple (n, dict[data]). If True, return entire molecule node attribute dict as (n, dict). If False, return just the molecule nodes n. default : (value, optional (default=None)) Value used for molecule nodes that don’t have the requested attribute. Only relevant if data is not True or False. """ molecules = [] if scaffold_smiles not in self: scaffold_smiles = canonize_smiles(scaffold_smiles, failsafe=True) if scaffold_smiles not in self: return molecules for succ in nx.bfs_tree(self, scaffold_smiles, reverse=False): if self.nodes[succ].get('type') == 'molecule': if data is False: molecules.append(succ) elif data is True: molecules.append((succ, self.nodes[succ])) else: molecules.append( (succ, self.nodes[succ].get(data, default))) return molecules
def _subgraph_from_scf(self, scaffold, traversal): """Private: Select a subgraph starting at a scaffold node. Parameters ---------- scaffold : str Scaffold node identifier. traversal : str {'parent', 'child', 'bidirectional'} The direction of traversal to create the subgraph. If 'bidirectional' both directions are considered. Returns ------- subgraph : ScaffoldGraph A subgraph starting at `scaffold`. """ G = self._graph query = canonize_smiles(scaffold) if not G.scaffold_in_graph(query): raise ValueError(f'scaffold: {query} not in graph {G}') if traversal == 'parent': nodes = G.get_parent_scaffolds(query) elif traversal == 'child': nodes = list(nx.descendants(G, query)) elif traversal == 'bidirectional': nodes = G.get_parent_scaffolds(query) nodes += list(nx.descendants(G, query)) else: msg = 'traversal must be one of {child, parent, bidirectional}' raise ValueError(msg) subgraph = G.subgraph([query] + nodes) return subgraph
def _get_scaffold_hierarchy(self, scaffold_smiles, data=False, default=None, max_levels=-1, traversal='parent'): """Private: Return a list of parent/child scaffolds for a query scaffold. Parameters ---------- scaffold_smiles : str SMILES of query scaffold. data : str, bool, optional The scaffold node attribute returned in 2-tuple (n, ddict[data]). If True, return entire node attribute dict as (n, ddict). If False, return just the nodes n. The default is False. default : value, bool, optional Value used for nodes that don't have the requested attribute. Only relevant if data is not True or False. max_levels : int, optional If > 0 only return scaffolds with a hierarchy difference to the query scaffold of `max_levels`. traversal : {'parent', 'child'}, optional Direction of traversal. Returns ------- list A list of scaffold parent/child nodes. """ assert traversal in {'parent', 'child'} reverse = traversal == 'parent' next_hiers = [] if scaffold_smiles not in self: scaffold_smiles = canonize_smiles(scaffold_smiles, failsafe=True) if scaffold_smiles not in self: return next_hiers level = self.nodes[scaffold_smiles].get('hierarchy', float('inf')) bfs = iter(nx.bfs_tree(self, scaffold_smiles, reverse=reverse).nodes) next(bfs) # first entry is the query node for succ in bfs: d = self.nodes[succ] if d.get('type') == 'scaffold' and ( max_levels < 0 or level - d.get('hierarchy', 0) <= max_levels): if data is False: next_hiers.append(succ) elif data is True: next_hiers.append((succ, self.nodes[succ])) else: next_hiers.append( (succ, self.nodes[succ].get(data, default))) return next_hiers
def scaffold_in_graph(self, scaffold_smiles): """Returns True if specified scaffold SMILES is in the scaffold graph Parameters ---------- scaffold_smiles : (str) SMILES of query scaffold. """ result = scaffold_smiles in self if result is not True: scaffold_smiles = canonize_smiles(scaffold_smiles, failsafe=True) result = scaffold_smiles in self return result
def get_molecules_for_scaffold(self, scaffold_smiles): """Return a list of molecule IDs which are represented by a scaffold in the graph. Note: This is determined by traversing the graph. In the case of a scaffold tree the results represent the rules used to prioritize the scaffolds. Parameters ---------- scaffold_smiles : (str) SMILES of query scaffold. """ molecules = [] if scaffold_smiles not in self: scaffold_smiles = canonize_smiles(scaffold_smiles, failsafe=True) if scaffold_smiles not in self: return molecules for succ in nx.bfs_tree(self, scaffold_smiles, reverse=False): if self.nodes[succ]['type'] == 'molecule': molecules.append(succ) return molecules
def get_child_scaffolds(self, scaffold_smiles, data=False, default=None, max_levels=-1): """Return a list of child scaffolds for a query scaffold. Parameters ---------- scaffold_smiles : (string) SMILES of query scaffold. data : (string or bool, optional (default=False)) The scaffold node attribute returned in 2-tuple (n, dict[data]). If True, return entire scaffold node attribute dict as (n, dict). If False, return just the scaffold nodes n. default : (value, optional (default=None)) Value used for scaffold nodes that don’t have the requested attribute. Only relevant if data is not True or False. max_levels : (integer, optional (default=-1)) If > 0 only return scaffolds with a hierarchy difference to the query scaffold of max_levels. """ children = [] if scaffold_smiles not in self: scaffold_smiles = canonize_smiles(scaffold_smiles, failsafe=True) if scaffold_smiles not in self: return children level = self.nodes[scaffold_smiles].get('hierarchy', float('inf')) bfs = iter(nx.bfs_tree(self, scaffold_smiles, reverse=False).nodes) next(bfs) # first entry is the query node for succ in bfs: d = self.nodes[succ] if d.get('type') == 'scaffold' and ( max_levels < 0 or d.get('hierarchy', 0) - level <= max_levels): if data is False: children.append(succ) elif data is True: children.append((succ, self.nodes[succ])) else: children.append((succ, self.nodes[succ].get(data, default))) return children
def scaffold_in_graph(self, scaffold_smiles): """Returns True if the specified scaffold SMILES is in the scaffold graph. Parameters ---------- scaffold_smiles : str SMILES of query scaffold. Returns ------- bool True if the scaffold is in the graph. Notes ----- If not initially found the SMILES is canonized and the graph is searched with the canonized SMILES key. """ result = scaffold_smiles in self if result is not True: scaffold_smiles = canonize_smiles(scaffold_smiles, failsafe=True) result = scaffold_smiles in self return result and self.nodes[scaffold_smiles]['type'] == 'scaffold'
def test_scp_num_sulphur_atoms(): test_smiles = 'c1csc2c(NCN3CCN(CCc4ccccc4)CC3)ncnc12' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles('c1ccc(CCN2CCN(CNc3ccncn3)CC2)cc1') max_result = canonize_smiles('c1nc(NCN2CCNCC2)c2sccc2n1') _test_rule_min_max(test_mol, SCPNumSAtoms, min_result, max_result)
def test_rsp_num_sulphur_atoms(): test_smiles = 'c1nc2ccc3nc(NC(=O)C(c4ccccc4)c4ccccc4)sc3c2s1' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles('O=C(Cc1ccccc1)Nc1nc2ccc3ncsc3c2s1') max_result = canonize_smiles('O=C(Nc1nc2ccccc2s1)C(c1ccccc1)c1ccccc1') _test_rule_min_max(test_mol, RSPNumSAtoms, min_result, max_result)
def test_rrp_linker_length(): test_smiles = 'O=C1c2ccccc2-c2c(NCCc3ccccc3)c(=O)[nH]c3cccc1c23' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles('O=C1C=Cc2c(NCCc3ccccc3)c(=O)[nH]c3cccc1c23') max_result = canonize_smiles('O=C1c2ccccc2-c2cc(=O)[nH]c3cccc1c23') _test_rule_min_max(test_mol, RRPLinkerLength, min_result, max_result)
def test_rrp_ring_size(): test_smiles = 'n1nc(-c2ccccc2)nc1=S' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles('c1ccccc1') max_result = canonize_smiles('S=C1N=CN=N1') _test_rule_min_max(test_mol, RRPRingSize, min_result, max_result)
def test_scp_num_oxygen_atoms(): test_smiles = 'C2Oc3ccccc3C(=O)C2=O' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles('c1ccccc1') max_result = canonize_smiles('O=C1C=COCC1=O') _test_rule_min_max(test_mol, SCPNumOAtoms, min_result, max_result)
def test_scp_num_nitrogen_atoms(): test_smiles = 'N1CCCC(OC(=O)C(O)(c2ccccc2)c2ccccc2)C1' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles('OC(c1ccccc1)c1ccccc1') max_result = canonize_smiles('O=C(OC1CCCNC1)C(O)c1ccccc1') _test_rule_min_max(test_mol, SCPNumNAtoms, min_result, max_result)
def test_scp_abs_delta(): test_smiles = 'C1CC2CN3C(CC=CC3=O)C4C2N(C1)CCC4' test_mol = Chem.MolFromSmiles(test_smiles) max_result = canonize_smiles('C1CC2CNCC3CCCN(C1)C23') _test_rule(test_mol, SCPAbsDelta('max'), max_result)
def test_scp_num_aromatic_rings(): test_smiles = 'N1CCCC(OC(=O)C(O)(c2ccccc2)c2ccccc2)C1' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles('O=C(OC1CCCNC1)C(O)c1ccccc1') max_result = canonize_smiles('OC(c1ccccc1)c1ccccc1') _test_rule_min_max(test_mol, SCPNumAromaticRings, min_result, max_result)
def test_rrp_num_nitrogen_atoms(): test_smiles = 'c1cccc2c(=O)[nH][nH]c(=O)c12' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles('O=c1ccc(=O)[nH][nH]1') max_result = canonize_smiles('c1ccccc1') _test_rule_min_max(test_mol, RRPNumNAtoms, min_result, max_result)
def test_rrp_num_oxygen_atoms(): test_smiles = 'C1OC(=O)C2=C1CCC=C2' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles('O=C1C=CCO1') max_result = canonize_smiles('C1=CCCC=C1') _test_rule_min_max(test_mol, RRPNumOAtoms, min_result, max_result)
def test_rrp_num_sulphur_atoms(): test_smiles = 'C1CSC(=NNC(=O)C(=O)CC2CCOCC2)N1' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles('N=C1NCCS1') max_result = canonize_smiles('C1CCOCC1') _test_rule_min_max(test_mol, RRPNumSAtoms, min_result, max_result)
def test_rsp_abs_delta(): test_smiles = 'O=C1N(CCCC3CCNCC3)CCC12CCN1CCCC12' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles('O=C1NCCC12CCN1CCCC12') max_result = canonize_smiles('O=C1N(CCCC2CCNCC2)CCC12CCNC2') _test_rule_min_max(test_mol, RSPAbsDelta, min_result, max_result)
def test_rsp_num_oxygen_atoms(): test_smiles = 'c1c2c(c3occ(-c4ccccc4)c(=O)c3c1)C=CCO2' test_mol = Chem.MolFromSmiles(test_smiles) min_result = canonize_smiles('O=c1ccoc2c3c(ccc12)OCC=C3') max_result = canonize_smiles('O=c1c(-c2ccccc2)coc2ccccc12') _test_rule_min_max(test_mol, RSPNumOAtoms, min_result, max_result)