Ejemplo n.º 1
0
def test_rsp_num_rings():
    test_smiles = 'O=C(c1c2ccccc2cc2ccccc12)N1CCC(N2CCC[C@@H](C(=O)N3CCOCC3)C2)CC1'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles('O=C(c1c2ccccc2cc2ccccc12)N1CCC(N2CCCCC2)CC1')
    max_result = canonize_smiles(
        'O=C(c1cccc2ccccc12)N1CCC(N2CCC[C@@H](C(=O)N3CCOCC3)C2)CC1')
    _test_rule_min_max(test_mol, RSPNumRings, min_result, max_result)
Ejemplo n.º 2
0
def test_scp_delta():
    test_smiles = 'OC5CC31CC5CC1C4CCc2occc2C4CC3'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles(
        'OC1CCC2(CCC3c4ccoc4CCC3C2)C1')  # retain spiro
    max_result = canonize_smiles('OC1CC23CCC4C=CCCC4C2CC1C3')  # retain bridged
    _test_rule_min_max(test_mol, SCPDelta, min_result, max_result)
Ejemplo n.º 3
0
def test_scp_num_linker_bonds():
    test_smiles = 'O=C(NCCCCN1CCN(c2ccccc2)CC1)c1ccc2c(c1)Cc1ccccc1-2'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles('O=C(NCCCCN1CCNCC1)c1ccc2c(c1)Cc1ccccc1-2')
    max_result = canonize_smiles(
        'O=C(NCCCCN1CCN(c2ccccc2)CC1)c1ccc2c(c1)CC=C2')
    _test_rule_min_max(test_mol, SCPNumLinkerBonds, min_result, max_result)
Ejemplo n.º 4
0
def test_rrp_het_atom_linked():
    test_smiles = 'O=C(NCc1ccccc1)N1CCN2C(=O)OC(c3ccccc3)(c3ccccc3)[C@@H]2C1'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles(
        'O=C(NCc1ccccc1)N1CCN2C(=O)OC(c3ccccc3)[C@@H]2C1')
    max_result = canonize_smiles('O=C1OC(c2ccccc2)(c2ccccc2)[C@@H]2CNCCN12')
    _test_rule_min_max(test_mol, RRPHetAtomLinked, min_result, max_result)
Ejemplo n.º 5
0
    def get_molecules_for_scaffold(self,
                                   scaffold_smiles,
                                   data=False,
                                   default=None):
        """Return a list of molecule IDs which are represented by a scaffold in the graph.

        Note: This is determined by traversing the graph. In the case of a scaffold tree
        the results represent the rules used to prioritize the scaffolds.

        Parameters
        ----------
        scaffold_smiles : (string) SMILES of query scaffold.
        data : (string or bool, optional (default=False)) The molecule node attribute returned in 2-tuple
            (n, dict[data]). If True, return entire molecule node attribute dict as (n, dict).
            If False, return just the molecule nodes n.
        default : (value, optional (default=None)) Value used for molecule nodes that don’t have the
            requested attribute. Only relevant if data is not True or False.
        """
        molecules = []
        if scaffold_smiles not in self:
            scaffold_smiles = canonize_smiles(scaffold_smiles, failsafe=True)
            if scaffold_smiles not in self:
                return molecules
        for succ in nx.bfs_tree(self, scaffold_smiles, reverse=False):
            if self.nodes[succ].get('type') == 'molecule':
                if data is False:
                    molecules.append(succ)
                elif data is True:
                    molecules.append((succ, self.nodes[succ]))
                else:
                    molecules.append(
                        (succ, self.nodes[succ].get(data, default)))
        return molecules
Ejemplo n.º 6
0
    def _subgraph_from_scf(self, scaffold, traversal):
        """Private: Select a subgraph starting at a scaffold node.

        Parameters
        ----------
        scaffold : str
            Scaffold node identifier.
        traversal : str {'parent', 'child', 'bidirectional'}
            The direction of traversal to create the subgraph.
            If 'bidirectional' both directions are considered.

        Returns
        -------
        subgraph : ScaffoldGraph
            A subgraph starting at `scaffold`.

        """
        G = self._graph
        query = canonize_smiles(scaffold)
        if not G.scaffold_in_graph(query):
            raise ValueError(f'scaffold: {query} not in graph {G}')
        if traversal == 'parent':
            nodes = G.get_parent_scaffolds(query)
        elif traversal == 'child':
            nodes = list(nx.descendants(G, query))
        elif traversal == 'bidirectional':
            nodes = G.get_parent_scaffolds(query)
            nodes += list(nx.descendants(G, query))
        else:
            msg = 'traversal must be one of {child, parent, bidirectional}'
            raise ValueError(msg)
        subgraph = G.subgraph([query] + nodes)
        return subgraph
Ejemplo n.º 7
0
    def _get_scaffold_hierarchy(self,
                                scaffold_smiles,
                                data=False,
                                default=None,
                                max_levels=-1,
                                traversal='parent'):
        """Private: Return a list of parent/child scaffolds for a query scaffold.

        Parameters
        ----------
        scaffold_smiles : str
            SMILES of query scaffold.
        data : str, bool, optional
            The scaffold node attribute returned in 2-tuple (n, ddict[data]).
            If True, return entire node attribute dict as (n, ddict).
            If False, return just the nodes n. The default is False.
        default : value, bool, optional
            Value used for nodes that don't have the requested attribute.
            Only relevant if data is not True or False.
        max_levels : int, optional
            If > 0 only return scaffolds with a hierarchy difference to the
            query scaffold of `max_levels`.
        traversal : {'parent', 'child'}, optional
            Direction of traversal.

        Returns
        -------
        list
            A list of scaffold parent/child nodes.

        """
        assert traversal in {'parent', 'child'}
        reverse = traversal == 'parent'
        next_hiers = []
        if scaffold_smiles not in self:
            scaffold_smiles = canonize_smiles(scaffold_smiles, failsafe=True)
            if scaffold_smiles not in self:
                return next_hiers
        level = self.nodes[scaffold_smiles].get('hierarchy', float('inf'))
        bfs = iter(nx.bfs_tree(self, scaffold_smiles, reverse=reverse).nodes)
        next(bfs)  # first entry is the query node
        for succ in bfs:
            d = self.nodes[succ]
            if d.get('type') == 'scaffold' and (
                    max_levels < 0
                    or level - d.get('hierarchy', 0) <= max_levels):
                if data is False:
                    next_hiers.append(succ)
                elif data is True:
                    next_hiers.append((succ, self.nodes[succ]))
                else:
                    next_hiers.append(
                        (succ, self.nodes[succ].get(data, default)))
        return next_hiers
Ejemplo n.º 8
0
    def scaffold_in_graph(self, scaffold_smiles):
        """Returns True if specified scaffold SMILES is in the scaffold graph

        Parameters
        ----------
        scaffold_smiles : (str) SMILES of query scaffold.
        """
        result = scaffold_smiles in self
        if result is not True:
            scaffold_smiles = canonize_smiles(scaffold_smiles, failsafe=True)
            result = scaffold_smiles in self
        return result
Ejemplo n.º 9
0
    def get_molecules_for_scaffold(self, scaffold_smiles):
        """Return a list of molecule IDs which are represented by a scaffold in the graph.

        Note: This is determined by traversing the graph. In the case of a scaffold tree
        the results represent the rules used to prioritize the scaffolds.

        Parameters
        ----------
        scaffold_smiles : (str) SMILES of query scaffold.
        """
        molecules = []
        if scaffold_smiles not in self:
            scaffold_smiles = canonize_smiles(scaffold_smiles, failsafe=True)
            if scaffold_smiles not in self:
                return molecules
        for succ in nx.bfs_tree(self, scaffold_smiles, reverse=False):
            if self.nodes[succ]['type'] == 'molecule':
                molecules.append(succ)
        return molecules
Ejemplo n.º 10
0
    def get_child_scaffolds(self,
                            scaffold_smiles,
                            data=False,
                            default=None,
                            max_levels=-1):
        """Return a list of child scaffolds for a query scaffold.

        Parameters
        ----------
        scaffold_smiles : (string)  SMILES of query scaffold.
        data : (string or bool, optional (default=False)) The scaffold node attribute returned in 2-tuple
            (n, dict[data]). If True, return entire scaffold node attribute dict as (n, dict).
            If False, return just the scaffold nodes n.
        default : (value, optional (default=None)) Value used for scaffold nodes that don’t have the
            requested attribute. Only relevant if data is not True or False.
        max_levels : (integer, optional (default=-1)) If > 0 only return scaffolds with a hierarchy
            difference to the query scaffold of max_levels.
        """
        children = []
        if scaffold_smiles not in self:
            scaffold_smiles = canonize_smiles(scaffold_smiles, failsafe=True)
            if scaffold_smiles not in self:
                return children
        level = self.nodes[scaffold_smiles].get('hierarchy', float('inf'))
        bfs = iter(nx.bfs_tree(self, scaffold_smiles, reverse=False).nodes)
        next(bfs)  # first entry is the query node
        for succ in bfs:
            d = self.nodes[succ]
            if d.get('type') == 'scaffold' and (
                    max_levels < 0
                    or d.get('hierarchy', 0) - level <= max_levels):
                if data is False:
                    children.append(succ)
                elif data is True:
                    children.append((succ, self.nodes[succ]))
                else:
                    children.append((succ, self.nodes[succ].get(data,
                                                                default)))
        return children
Ejemplo n.º 11
0
    def scaffold_in_graph(self, scaffold_smiles):
        """Returns True if the specified scaffold SMILES is in the scaffold graph.

        Parameters
        ----------
        scaffold_smiles : str
            SMILES of query scaffold.

        Returns
        -------
        bool
            True if the scaffold is in the graph.

        Notes
        -----
        If not initially found the SMILES is canonized and the graph is searched
        with the canonized SMILES key.

        """
        result = scaffold_smiles in self
        if result is not True:
            scaffold_smiles = canonize_smiles(scaffold_smiles, failsafe=True)
            result = scaffold_smiles in self
        return result and self.nodes[scaffold_smiles]['type'] == 'scaffold'
Ejemplo n.º 12
0
def test_scp_num_sulphur_atoms():
    test_smiles = 'c1csc2c(NCN3CCN(CCc4ccccc4)CC3)ncnc12'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles('c1ccc(CCN2CCN(CNc3ccncn3)CC2)cc1')
    max_result = canonize_smiles('c1nc(NCN2CCNCC2)c2sccc2n1')
    _test_rule_min_max(test_mol, SCPNumSAtoms, min_result, max_result)
Ejemplo n.º 13
0
def test_rsp_num_sulphur_atoms():
    test_smiles = 'c1nc2ccc3nc(NC(=O)C(c4ccccc4)c4ccccc4)sc3c2s1'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles('O=C(Cc1ccccc1)Nc1nc2ccc3ncsc3c2s1')
    max_result = canonize_smiles('O=C(Nc1nc2ccccc2s1)C(c1ccccc1)c1ccccc1')
    _test_rule_min_max(test_mol, RSPNumSAtoms, min_result, max_result)
Ejemplo n.º 14
0
def test_rrp_linker_length():
    test_smiles = 'O=C1c2ccccc2-c2c(NCCc3ccccc3)c(=O)[nH]c3cccc1c23'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles('O=C1C=Cc2c(NCCc3ccccc3)c(=O)[nH]c3cccc1c23')
    max_result = canonize_smiles('O=C1c2ccccc2-c2cc(=O)[nH]c3cccc1c23')
    _test_rule_min_max(test_mol, RRPLinkerLength, min_result, max_result)
Ejemplo n.º 15
0
def test_rrp_ring_size():
    test_smiles = 'n1nc(-c2ccccc2)nc1=S'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles('c1ccccc1')
    max_result = canonize_smiles('S=C1N=CN=N1')
    _test_rule_min_max(test_mol, RRPRingSize, min_result, max_result)
Ejemplo n.º 16
0
def test_scp_num_oxygen_atoms():
    test_smiles = 'C2Oc3ccccc3C(=O)C2=O'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles('c1ccccc1')
    max_result = canonize_smiles('O=C1C=COCC1=O')
    _test_rule_min_max(test_mol, SCPNumOAtoms, min_result, max_result)
Ejemplo n.º 17
0
def test_scp_num_nitrogen_atoms():
    test_smiles = 'N1CCCC(OC(=O)C(O)(c2ccccc2)c2ccccc2)C1'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles('OC(c1ccccc1)c1ccccc1')
    max_result = canonize_smiles('O=C(OC1CCCNC1)C(O)c1ccccc1')
    _test_rule_min_max(test_mol, SCPNumNAtoms, min_result, max_result)
Ejemplo n.º 18
0
def test_scp_abs_delta():
    test_smiles = 'C1CC2CN3C(CC=CC3=O)C4C2N(C1)CCC4'
    test_mol = Chem.MolFromSmiles(test_smiles)
    max_result = canonize_smiles('C1CC2CNCC3CCCN(C1)C23')
    _test_rule(test_mol, SCPAbsDelta('max'), max_result)
Ejemplo n.º 19
0
def test_scp_num_aromatic_rings():
    test_smiles = 'N1CCCC(OC(=O)C(O)(c2ccccc2)c2ccccc2)C1'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles('O=C(OC1CCCNC1)C(O)c1ccccc1')
    max_result = canonize_smiles('OC(c1ccccc1)c1ccccc1')
    _test_rule_min_max(test_mol, SCPNumAromaticRings, min_result, max_result)
Ejemplo n.º 20
0
def test_rrp_num_nitrogen_atoms():
    test_smiles = 'c1cccc2c(=O)[nH][nH]c(=O)c12'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles('O=c1ccc(=O)[nH][nH]1')
    max_result = canonize_smiles('c1ccccc1')
    _test_rule_min_max(test_mol, RRPNumNAtoms, min_result, max_result)
Ejemplo n.º 21
0
def test_rrp_num_oxygen_atoms():
    test_smiles = 'C1OC(=O)C2=C1CCC=C2'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles('O=C1C=CCO1')
    max_result = canonize_smiles('C1=CCCC=C1')
    _test_rule_min_max(test_mol, RRPNumOAtoms, min_result, max_result)
Ejemplo n.º 22
0
def test_rrp_num_sulphur_atoms():
    test_smiles = 'C1CSC(=NNC(=O)C(=O)CC2CCOCC2)N1'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles('N=C1NCCS1')
    max_result = canonize_smiles('C1CCOCC1')
    _test_rule_min_max(test_mol, RRPNumSAtoms, min_result, max_result)
Ejemplo n.º 23
0
def test_rsp_abs_delta():
    test_smiles = 'O=C1N(CCCC3CCNCC3)CCC12CCN1CCCC12'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles('O=C1NCCC12CCN1CCCC12')
    max_result = canonize_smiles('O=C1N(CCCC2CCNCC2)CCC12CCNC2')
    _test_rule_min_max(test_mol, RSPAbsDelta, min_result, max_result)
Ejemplo n.º 24
0
def test_rsp_num_oxygen_atoms():
    test_smiles = 'c1c2c(c3occ(-c4ccccc4)c(=O)c3c1)C=CCO2'
    test_mol = Chem.MolFromSmiles(test_smiles)
    min_result = canonize_smiles('O=c1ccoc2c3c(ccc12)OCC=C3')
    max_result = canonize_smiles('O=c1c(-c2ccccc2)coc2ccccc12')
    _test_rule_min_max(test_mol, RSPNumOAtoms, min_result, max_result)