Example #1
0
def _fragment_mol(mol, fragment_filter, num_heavies=None):

    cut_lists = fragment_filter.get_cut_lists(mol)

    if not cut_lists:
        return

    seen = set()

    if num_heavies is None:
        num_heavies = count_num_heavies(mol)

    # Identify atoms that are chiral (assigned and unassigned)in parent compound
    # 0 means not chiral, 1 means assigned, 2 means unassigned
    atom_ranks = Chem.CanonicalRankAtoms(mol, breakTies=False)
    chiral_flags = get_chiral_flags(mol, atom_ranks)

    for cut_list in cut_lists:
        num_cuts = len(cut_list)
        #print("num_cuts", num_cuts)
        if num_cuts == 1:
            fragmentations = make_single_cut(mol, cut_list[0], chiral_flags,
                                             fragment_filter)
        else:
            fragmentations = make_multiple_cuts(mol, cut_list, chiral_flags,
                                                fragment_filter)
            if fragmentations == None:  # Fragmentation has been filtered out
                continue

        for fragmentation in fragmentations:
            key = fragmentation.get_unique_key()  # XXX + "012" + YYY
            if key not in seen:
                seen.add(key)
                yield fragmentation
Example #2
0
def get_parity(Mol, Atm_idx):

    canonical_rank = list()
    neighbor_list = list()
    neighbor_rank = list()
    string_rank = list(Chem.CanonicalRankAtoms(Mol, breakTies=False))
    for rank in string_rank:
        canonical_rank.append(int(rank))
    del string_rank

    for bond in Mol.GetAtomWithIdx(Atm_idx).GetBonds():
        neighbor_idx = bond.GetOtherAtomIdx(Atm_idx)
        neighbor_list.append(neighbor_idx)
        neighbor_rank.append(canonical_rank[neighbor_idx])

    ### See also http://www.dalkescientific.com/writings/diary/archive/2016/08/14/fragment_chiral_molecules.html
    N = len(neighbor_rank)
    num_swaps = 0
    for i in range(N - 1):
        for j in range(i + 1, N):
            if neighbor_rank[i] > neighbor_rank[j]:
                neighbor_rank[i], neighbor_rank[j] = neighbor_rank[
                    j], neighbor_rank[i]
                num_swaps += 1
    return num_swaps % 2
Example #3
0
def reorder_atoms(
    mol: Chem.rdchem.Mol,
    break_ties: bool = True,
    include_chirality: bool = True,
    include_isotopes: bool = True,
) -> Optional[Chem.rdchem.Mol]:
    """Reorder the atoms in a mol. It ensures a single atom order for the same molecule,
    regardless of its original representation.

    Args:
        mol: a molecule.
        break_ties: Force breaking of ranked ties.
        include_chirality: Use chiral information when computing rank.
        include_isotopes: Use isotope information when computing rank.

    Returns:
        mol: a molecule.
    """
    if mol.GetNumAtoms() == 0:
        return mol

    new_order = Chem.CanonicalRankAtoms(
        mol,
        breakTies=break_ties,
        includeChirality=include_chirality,
        includeIsotopes=include_isotopes,
    )
    new_order = sorted([(y, x) for x, y in enumerate(new_order)])
    return Chem.RenumberAtoms(mol, [y for (x, y) in new_order])
Example #4
0
    def check_nei_bonds(bond):
        a1, a2 = bond.GetBeginAtom(), bond.GetEndAtom()
        a1_bonds_single = [
            b.GetBondType() == Chem.BondType.SINGLE for b in a1.GetBonds()
            if b.GetIdx() != bond.GetIdx()
        ]
        a2_bonds_single = [
            b.GetBondType() == Chem.BondType.SINGLE for b in a2.GetBonds()
            if b.GetIdx() != bond.GetIdx()
        ]

        # if there are two identical substituents in one side then the bond is unsteric (no stereoisomers possible)
        ranks = list(Chem.CanonicalRankAtoms(m, breakTies=False))
        a1_nei = [
            a.GetIdx() for a in a1.GetNeighbors() if a.GetIdx() != a2.GetIdx()
        ]
        if len(a1_nei) == 2 and \
                all(m.GetBondBetweenAtoms(i, a1.GetIdx()).GetBondType() == Chem.BondType.SINGLE for i in a1_nei) and \
                ranks[a1_nei[0]] == ranks[a1_nei[1]]:
            return False
        a2_nei = [
            a.GetIdx() for a in a2.GetNeighbors() if a.GetIdx() != a1.GetIdx()
        ]
        if len(a2_nei) == 2 and \
                all(m.GetBondBetweenAtoms(i, a2.GetIdx()).GetBondType() == Chem.BondType.SINGLE for i in a2_nei) and \
                ranks[a2_nei[0]] == ranks[a2_nei[1]]:
            return False

        # if list is empty this is a terminal atom, e.g. O in C=O
        if a1_bonds_single and a2_bonds_single and \
                all(a1_bonds_single) and all(a2_bonds_single):
            return True
        else:
            return False
Example #5
0
def get_symmetry_groups(mol):
    """
    Computes the symmetry class for each atom and returns a list with the idx of non-symmetric atoms.

    Parameters
    ----------
    mol : rdkit molecule object.
          Fragment from custom-made library.

    Returns
    -------
    symmetry_list : list
                    List with atom indices.
    """
    rank = {}
    symmetry_list = []
    symmetry_rank_list = []
    counter = 0

    for counter, atom in enumerate(mol.GetAtoms()):
        rank[atom.GetIdx()] = list(
            Chem.CanonicalRankAtoms(mol, breakTies=False))[counter]
    for idx, symmetry_rank in rank.items():
        if symmetry_rank not in symmetry_rank_list:
            symmetry_rank_list.append(symmetry_rank)
            symmetry_list.append(idx)
    return symmetry_list
Example #6
0
def get_symmetry_classes(molecule: off.Molecule) -> List[int]:
    """Calculate the symmetry classes of each atom in the molecule using the backend toolkits."""

    try:
        from rdkit import Chem

        rd_mol = molecule.to_rdkit()
        symmetry_classes = list(
            Chem.CanonicalRankAtoms(rd_mol, breakTies=False))

    except (ImportError, ModuleNotFoundError):
        from openeye import oechem

        oe_mol = molecule.to_openeye()
        oechem.OEPerceiveSymmetry(oe_mol)

        symmetry_classes_by_index = {
            a.GetIdx(): a.GetSymmetryClass()
            for a in oe_mol.GetAtoms()
        }
        symmetry_classes = [
            symmetry_classes_by_index[i] for i in range(molecule.n_atoms)
        ]

    return symmetry_classes
Example #7
0
def get_assm_cands(mol, atoms, inter_label, cluster, inter_size):
    atoms = list(set(atoms))
    mol = get_clique_mol(mol, atoms)
    atom_map = [idxfunc(atom) for atom in mol.GetAtoms()]
    mol = set_atommap(mol)
    rank = Chem.CanonicalRankAtoms(mol, breakTies=False)
    rank = {x: y for x, y in zip(atom_map, rank)}

    pos, icls = zip(*inter_label)
    if inter_size == 1:
        cands = [pos[0]] + [x for x in cluster if rank[x] != rank[pos[0]]]

    elif icls[0] == icls[1]:  #symmetric case
        shift = cluster[inter_size - 1:] + cluster[:inter_size - 1]
        cands = zip(cluster, shift)
        cands = [pos] + [(x, y) for x, y in cands
                         if (rank[min(x, y)], rank[max(x, y)]) !=
                         (rank[min(pos)], rank[max(pos)])]
    else:
        shift = cluster[inter_size - 1:] + cluster[:inter_size - 1]
        cands = zip(cluster + shift, shift + cluster)
        cands = [pos] + [(x, y) for x, y in cands
                         if (rank[x], rank[y]) != (rank[pos[0]], rank[pos[1]])]

    return cands
Example #8
0
def canonicalize(mol):
    # E.g.
    # returns list [2,3,1,0]
    # means atom indexed 0 in mol is mapped to canonical order 2,
    # atom indexed 1 is mapped to canonical order 3,
    # atom indexed 2 is mapped to 1,
    # atom indexed 3 is mapped to 0.
    return list(Chem.CanonicalRankAtoms(mol, breakTies=True))
Example #9
0
def canonicalize_tautomers(rank_list, mol):

    canon = tautomer.TautomerCanonicalizer()
    mol_t = canon.canonicalize(mol)
    rank_list = list()
    string_rank = list(Chem.CanonicalRankAtoms(mol_t, breakTies=False))
    for rank in string_rank:
        rank_list.append(int(rank))
    del string_rank
Example #10
0
 def ranks(m):
     r = []
     for i, j in enumerate(
             list(
                 Chem.CanonicalRankAtoms(m,
                                         breakTies=False,
                                         includeChirality=False,
                                         includeIsotopes=False))):
         r.append([mol.GetProp("_Name"), i + 1, j])  # 1based!
     return r
Example #11
0
def find_identical_atoms(smi, atom_list):
    rdkit_mol = Chem.MolFromSmiles(smi)
    len_list = len(atom_list)
    
    atom_rank = list(Chem.CanonicalRankAtoms(rdkit_mol, breakTies=False))
    for idx, atom in enumerate(rdkit_mol.GetAtoms()):
        if atom.GetIdx() in atom_list[:len_list]:
            sym_atoms = [int(atom_idx) for atom_idx, ranking in enumerate(atom_rank) if ranking == atom_rank[idx] and atom_idx not in atom_list] 
            atom_list.extend(sym_atoms)
    return atom_list
Example #12
0
def canonicalize_mol(mol, inplace=False, **kwargs):
    r"""Take a PLAMS molecule and sort its atoms based on their canonical rank.

    Example:

    .. code:: python

        >>> from scm.plams import Molecule, canonicalize_mol

        # Methane
        >>> mol: Molecule = ...
        >>> print(mol)
        Atoms:
            1         H      0.640510      0.640510     -0.640510
            2         H      0.640510     -0.640510      0.640510
            3         C      0.000000      0.000000      0.000000
            4         H     -0.640510      0.640510      0.640510
            5         H     -0.640510     -0.640510     -0.640510

        >>> print(canonicalize_mol(mol))
        Atoms:
            1         C      0.000000      0.000000      0.000000
            2         H     -0.640510     -0.640510     -0.640510
            3         H     -0.640510      0.640510      0.640510
            4         H      0.640510     -0.640510      0.640510
            5         H      0.640510      0.640510     -0.640510

    :parameter mol: The to-be canonicalized molecule.
    :type mol: |Molecule|
    :parameter bool inplace: Whether to sort the atoms inplace or to return a new molecule.
    :parameter \**kwargs: Further keyword arguments for rdkit.Chem.CanonicalRankAtoms_.
    :return: Either ``None`` or a newly sorted molecule, depending on the value of ``inplace``.
    :rtype: None or |Molecule|

    .. _rdkit.Chem.CanonicalRankAtoms: https://www.rdkit.org/docs/source/rdkit.Chem.rdmolfiles.html#rdkit.Chem.rdmolfiles.CanonicalRankAtoms

    """
    if not isinstance(mol, Molecule):
        raise TypeError("`mol` expected a plams Molecule")
    rdmol = to_rdmol(mol)
    idx_rank = Chem.CanonicalRankAtoms(rdmol, **kwargs)

    if inplace:
        mol.atoms = [
            at for _, at in sorted(zip(idx_rank, mol.atoms), reverse=True)
        ]
        return None
    else:
        ret = mol.copy()
        ret.atoms = [
            at for _, at in sorted(zip(idx_rank, ret.atoms), reverse=True)
        ]
        return ret
Example #13
0
def remove_identical_atoms(rdkit_mol, atom_list):
    idx_list = []
    rank_kept = []
    atom_rank = list(Chem.CanonicalRankAtoms(rdkit_mol, breakTies=False))
    for idx, atom in enumerate(atom_list):
        if atom_rank[atom] not in rank_kept:
            rank_kept.append(atom_rank[atom])
            idx_list.append(idx)
    
    atom_list = np.array(atom_list)[idx_list].tolist()
    
    return atom_list
Example #14
0
def _sample_ordering(mol, scaffold_nodes, k, p, ms=MoleculeSpec.get_default()):
    """Sampling decoding routes of a given molecule `mol`

    Args:
        mol (Chem.Mol): the given molecule (type: Chem.Mol)
        scaffold_nodes (np.ndarray): the nodes marked as scaffold
        k (int): The number of importance samples
        p (float): Degree of uncertainty during route sampling, should be in (0, 1)
        ms (mol_spec.MoleculeSpec)

    Returns:
        route_list (np.ndarray): route_list[i][j] the index of the atom reached at step j in sample i
        step_ids_list (np.ndarray): step_ids_list[i][j] the step at which atom j is reach at sample i
        logp_list (np.ndarray): logp_list[i] - the log-likelihood value of route i
    """
    # build graph
    atom_types = []
    for atom in mol.GetAtoms():
        atom_types.append(ms.get_atom_type(atom))

    atom_ranks = []
    for r in Chem.CanonicalRankAtoms(mol):
        atom_ranks.append(r)
    atom_ranks = np.array(atom_ranks)

    bonds = []
    for b in mol.GetBonds():
        idx_1, idx_2 = b.GetBeginAtomIdx(), b.GetEndAtomIdx()
        bonds.append([idx_1, idx_2])

    # build nx graph
    graph = nx.Graph()
    graph.add_nodes_from(range(len(atom_ranks)))
    graph.add_edges_from(bonds)

    route_list = []
    step_ids_list = []
    logp_list = []
    for _ in range(k):
        step_ids, log_p = _traverse(graph=graph, atom_ranks=atom_ranks, scaffold_nodes=scaffold_nodes, p=p)
        step_ids_list.append(step_ids)
        step_ids = np.argsort(step_ids)
        route_list.append(step_ids)
        logp_list.append(log_p)

    # cast to numpy array
    route_list = np.array(route_list, dtype=np.int32)
    step_ids_list = np.array(step_ids_list, dtype=np.int32)
    logp_list = np.array(logp_list, dtype=np.float32)

    return route_list, step_ids_list, logp_list
Example #15
0
def combine_core_env_to_rxn_smarts(core, env, keep_h=True):

    if isinstance(env, str):
        m_env = Chem.MolFromSmiles(env, sanitize=False)
    if isinstance(core, str):
        m_frag = Chem.MolFromSmiles(core, sanitize=False)

    backup_atom_map = "backupAtomMap"

    # put all atom maps to atom property and remove them
    for a in m_env.GetAtoms():
        atom_map = a.GetAtomMapNum()
        if atom_map:
            a.SetIntProp(backup_atom_map, atom_map)
            a.SetAtomMapNum(0)

    for a in m_frag.GetAtoms():
        atom_map = a.GetAtomMapNum()
        if atom_map:
            a.SetIntProp(backup_atom_map, atom_map)
            a.SetAtomMapNum(0)

    # set canonical ranks for atoms in env without maps
    m_env.UpdatePropertyCache()
    for atom_id, rank in zip([a.GetIdx() for a in m_env.GetAtoms()],
                             list(Chem.CanonicalRankAtoms(m_env))):
        a = m_env.GetAtomWithIdx(atom_id)
        if not a.HasProp(backup_atom_map):
            a.SetAtomMapNum(rank + 1)  # because ranks start from 0

    m = Chem.RWMol(Chem.CombineMols(m_frag, m_env))

    links = defaultdict(list)  # pairs of atom ids to create bonds
    att_to_remove = []  # ids of att points to remove
    for a in m.GetAtoms():
        if a.HasProp(backup_atom_map):
            i = a.GetIntProp(backup_atom_map)
            links[i].append(a.GetNeighbors()[0].GetIdx())
            att_to_remove.append(a.GetIdx())

    for i, j in links.values():
        m.AddBond(i, j, Chem.BondType.SINGLE)

    for i in sorted(att_to_remove, reverse=True):
        m.RemoveAtom(i)

    comb_sma = mol_to_smarts(m, keep_h)
    if not keep_h:  # remove H only in mapped env part
        comb_sma = patt_remove_h.sub('', comb_sma)
    return comb_sma
Example #16
0
def get_symmetry_class(smi):
    symmetry = []

    m = Chem.MolFromSmiles(smi)
    symmetry_classes = Chem.CanonicalRankAtoms(m, breakTies=False)

    #get the symmetry class of the attachements points
    #Note: 1st star is the zero index,
    #2nd star is first index, etc
    for atom, symmetry_class in zip(m.GetAtoms(), symmetry_classes):
        if (atom.GetMass() == 0):
            symmetry.append(symmetry_class)

    return symmetry
Example #17
0
    def __init__(self, rdmol, root_atm_idx=0):

        if not root_atm_idx < rdmol.GetNumAtoms():
            raise ValueError("root_atm_idx must be 0<root_atm_idx<N_atms")

        self.rdmol = rdmol
        self.ordered_atom_list = [None] * rdmol.GetNumAtoms()
        self.z = dict()
        self.N_atms = 0
        self.rank = list(Chem.CanonicalRankAtoms(rdmol, breakTies=False))
        self.n_non_deadends = 0

        self.add_atom(root_atm_idx)
        self.order_atoms(root_atm_idx)
        self.zzit()
Example #18
0
    def __init__(self, Mol, Verbose=False):

        self.mol = Mol

        self.canonical_rank = list()
        string_rank = list(Chem.CanonicalRankAtoms(self.mol, breakTies=False))
        for rank in string_rank:
            self.canonical_rank.append(int(rank))
        del string_rank
        canonicalize_tautomers(self.canonical_rank, self.mol)

        ### Holds connector instances
        self.connectors = list()

        ### Holds rdkit Mol instancs of final capped fragments
        self.frag_list = list()
        ### Holds atom indices of fragments in numbering scheme of
        ### original molecule.
        self.frag_list_map = list()

        ### Holds r/l anchor atom idcs for each fragment in numbering
        ### scheme of the fragment molecule
        self.ranc_list = list()
        self.lanc_list = list()

        ### Holds r/l cap atom idcs for each fragment in numbering
        ### scheme of the fragment molecule
        self.rcap_list_map = list()
        self.lcap_list_map = list()

        ### Holds corresponding connector idx for each r/l cap
        self.rcap_conn_idx = list()
        self.lcap_conn_idx = list()

        ### Stores fragment to fragment cross couplings
        ### atom indices for each cross couplin
        self.frag2frag_atms = list()
        ### fragment indices for each cross coulin
        self.frag2frag_frgs = list()

        self.__frag_count = 0

        self.__connector_count = 0

        self.verbose = Verbose

        if self.verbose:
            self.process_list = list()
Example #19
0
def getSymmClasses(mol):

    ranks = list(Chem.CanonicalRankAtoms(mol, breakTies=False))
    print('ranks: ', ranks)
    rankUniVals = set(ranks)

    if len(ranks) == len(rankUniVals):
        print("no equivalents")
        return []

    symmGroups = []

    for rankVal in rankUniVals:
        symmGroup = [i for i, x in enumerate(ranks) if x == rankVal]
        symmGroups.append(list(symmGroup))

    return symmGroups
Example #20
0
def canonical_order_atoms(molecule, h_last=True):
    """
    Canonical order atoms in RDKit molecule. Eaach atom in the molecule is given a map index that corresponds to the RDkit
    rank for that atom (+1). RDKit atom ranking ranks hydrogens first and then the heavy atoms. When
    h_last is set to True, the map indices are reordered to put hydrogens after the heavy atoms.
    Parameters
    ----------
    molecule: rdkit mol
    h_last: bool, optional, default is True

    Returns
    -------
    molecule: rdkit molecule with map indices that correspond to the atom canonical rank
    """

    # Check if molecule already has map. If it does, remove map because Chem.CanonicalRankAtoms uses map indices in
    # ranking
    if has_atom_map(molecule):
        remove_atom_map(molecule)

    # Add explicit hydrogen
    molecule = Chem.AddHs(molecule)
    heavy_atoms = 0
    hydrogens = 0
    ranks = list(Chem.CanonicalRankAtoms(molecule, breakTies=True))
    for i, j in enumerate(ranks):
        atom = molecule.GetAtomWithIdx(i)
        atom.SetAtomMapNum(j + 1)
        if atom.GetAtomicNum() != 1:
            # heavy atom
            heavy_atoms += 1
        else:
            # hydrogen
            hydrogens += 1

    if h_last:
        # reorder map to put hydrogen last
        for atom in molecule.GetAtoms():
            map_idx = atom.GetAtomMapNum()
            if atom.GetAtomicNum() != 1:
                atom.SetAtomMapNum(map_idx - hydrogens)
            else:
                atom.SetAtomMapNum(map_idx + heavy_atoms)
    return molecule
Example #21
0
def get_graph_from_smiles(smiles):
    mol = Chem.MolFromSmiles(smiles)

    # build graph
    atom_types, atom_ranks, bonds, bond_types = [], [], [], []
    for a, r in zip(mol.GetAtoms(), Chem.CanonicalRankAtoms(mol)):
        atom_types.append(meta.atom_to_index(a))
        atom_ranks.append(r)
    for b in mol.GetBonds():
        idx_1, idx_2, bt = b.GetBeginAtomIdx(), b.GetEndAtomIdx(), meta.bond_to_index(b)
        bonds.append([idx_1, idx_2])
        bond_types.append(bt)

    # build nx graph
    graph = nx.Graph()
    graph.add_nodes_from(range(len(atom_types)))
    graph.add_edges_from(bonds)

    return graph, atom_types, atom_ranks, bonds, bond_types
Example #22
0
File: crem.py Project: DrrDom/crem
def __extend_output_by_equivalent_atoms(mol, output):
    """
    Generate additional fragments which cover equivalent atoms to extend the output and make replacements for
    equivalent atoms as well

    :param mol:
    :param output:
    :return:
    """

    atom_ranks = list(
        Chem.CanonicalRankAtoms(mol,
                                breakTies=False,
                                includeChirality=False,
                                includeIsotopes=False))
    tmp = defaultdict(list)
    for i, rank in enumerate(atom_ranks):
        tmp[rank].append(i)
    atom_eq = dict()  # dict of equivalent atoms
    for ids in tmp.values():
        if len(ids) > 1:
            for i in ids:
                atom_eq[i] = [j for j in ids if j != i]

    extended_output = []
    for item in output:
        if all(i in atom_eq.keys() for i in
               item[2]):  # if all atoms of a fragment have equivalent atoms
            smi = patt_remove_map.sub('', item[1])
            smi = patt_remove_brackets.sub('', smi)
            ids_list = [
                set(i)
                for i in mol.GetSubstructMatches(Chem.MolFromSmarts(smi))
            ]
            for ids_matched in ids_list:
                for ids_eq in product(
                        *(atom_eq[i] for i in item[2]
                          )):  # enumerate all combinations of equivalent atoms
                    if ids_matched == set(ids_eq):
                        extended_output.append(
                            (item[0], item[1], tuple(sorted(ids_eq))))
    return extended_output
Example #23
0
def canonicalize_atom_order(m, reverse=True, add_hs=True):
    """Canonicalize using RDKIT

    Args:
        m (rdkit.Chem.Mol): Mol object for RDKit

    Returns:
        rdkit.Chem.Mol: New canonicalized RDKit mol
    """
    if add_hs:
        mH = Chem.AddHs(m)
    else:
        mH = m
    Compute2DCoords(mH)
    m_neworder = tuple(
        zip(*sorted(
            [(j, i) for i, j in enumerate(Chem.CanonicalRankAtoms(mH))],
            reverse=reverse,
        )))[1]
    m_canon = Chem.RenumberAtoms(mH, m_neworder)
    add_atom_indices(m_canon)
    return m_canon
    def convert(self):
        """ Convert atom order.

        Returns:
            RDKit Mol object: An RDKit Mol object with canonical atom order.
        """
        # Creat canonical order dict
        old2new = Chem.CanonicalRankAtoms(self.mol,
                                          includeChirality=True,
                                          breakTies=True)
        new2old = {o: i for i, o in enumerate(old2new)}
        # build new molecule based on the new atom order
        new_mol = Chem.rdchem.RWMol(Chem.Mol())
        # add Atoms
        for idx in range(len(old2new)):
            new_mol.AddAtom(self.mol.GetAtomWithIdx(new2old[idx]))
        # rebuild Bonds
        bonds = self.mol.GetBonds()
        for b in bonds:
            new_mol.AddBond(
                old2new[b.GetBeginAtomIdx()],
                old2new[b.GetEndAtomIdx()],
                b.GetBondType(),
            )
        # Add conformer (atom 3D positions)
        try:
            old_conformer = self.mol.GetConformer(0)
        except ValueError:
            old_conformer = None
        if old_conformer is not None:
            new_conformer = Chem.Conformer(new_mol.GetNumAtoms())
            for idx in range(len(old2new)):
                pos = old_conformer.GetAtomPosition(new2old[idx])
                new_conformer.SetAtomPosition(idx, pos)
            new_mol.AddConformer(new_conformer)
        return new_mol
Example #25
0
def __standardize_smiles_with_att_points(mol, keep_stereo=False):
    """
    to avoid different order of atoms in SMILES with different map number of attachment points

    smi = ["ClC1=C([*:1])C(=S)C([*:2])=C([*:3])N1",
           "ClC1=C([*:1])C(=S)C([*:3])=C([*:2])N1",
           "ClC1=C([*:2])C(=S)C([*:1])=C([*:3])N1",
           "ClC1=C([*:2])C(=S)C([*:3])=C([*:1])N1",
           "ClC1=C([*:3])C(=S)C([*:1])=C([*:2])N1",
           "ClC1=C([*:3])C(=S)C([*:2])=C([*:1])N1"]

    these will produce different output with RDKit MolToSmiles():
        S=c1c([*:1])c(Cl)[nH]c([*:3])c1[*:2]
        S=c1c([*:1])c(Cl)[nH]c([*:2])c1[*:3]
        S=c1c([*:1])c([*:3])[nH]c(Cl)c1[*:2]
        S=c1c([*:2])c(Cl)[nH]c([*:1])c1[*:3]
        S=c1c([*:1])c([*:2])[nH]c(Cl)c1[*:3]
        S=c1c([*:2])c([*:1])[nH]c(Cl)c1[*:3]

    output of this function
        S=c1c([*:2])c([*:3])[nH]c(Br)c1[*:1]
        S=c1c([*:3])c([*:2])[nH]c(Br)c1[*:1]
        S=c1c([*:1])c([*:3])[nH]c(Br)c1[*:2]
        S=c1c([*:3])c([*:1])[nH]c(Br)c1[*:2]
        S=c1c([*:1])c([*:2])[nH]c(Br)c1[*:3]
        S=c1c([*:2])c([*:1])[nH]c(Br)c1[*:3]

    https://sourceforge.net/p/rdkit/mailman/message/35862258/
    """

    # update property cache if needed
    if mol.NeedsUpdatePropertyCache():
        mol.UpdatePropertyCache()

    # store original maps and remove map numbers from mol
    backup_atom_map = "backupAtomMap"
    for a in mol.GetAtoms():
        atom_map = a.GetAtomMapNum()
        if atom_map:
            a.SetIntProp(backup_atom_map, atom_map)
            a.SetAtomMapNum(0)

    # get canonical ranks for atoms for a mol without maps
    atoms = list(
        zip(list(Chem.CanonicalRankAtoms(mol)),
            [a.GetIdx() for a in mol.GetAtoms()]))
    atoms.sort()

    # set new atom maps based on canonical order
    rep = {}
    atom_map = 1
    for pos, atom_idx in atoms:
        a = mol.GetAtomWithIdx(atom_idx)
        if a.HasProp(backup_atom_map):
            a.SetAtomMapNum(atom_map)
            rep["[*:%i]" % atom_map] = "[*:%i]" % a.GetIntProp(backup_atom_map)
            atom_map += 1

    # get SMILES and relabel with original map numbers
    s = Chem.MolToSmiles(mol, isomericSmiles=keep_stereo)
    rep = dict((re.escape(k), v) for k, v in rep.items())
    patt = re.compile("|".join(rep.keys()))
    s = patt.sub(lambda m: rep[re.escape(m.group(0))], s)

    return s
import math
import pickle

p_in = sys.argv[1]
p_out = sys.argv[2]

db_shingles = {}
sh_count = 0
with open(p_in, 'r') as fi_in:
    #fi_in.readline() # header
    for i, line in enumerate(fi_in):
        smi = line.split('\t')[0].rstrip()
        mol = Chem.MolFromSmiles(smi)

        if mol:
            for atm in Chem.CanonicalRankAtoms(mol):
                for N in range(1, 4):
                    bonds = AllChem.FindAtomEnvironmentOfRadiusN(mol, N, atm)

                    if not bonds:
                        break

                    # the faster method...
                    atoms = set()
                    for bond_id in bonds:
                        bond = mol.GetBondWithIdx(bond_id)
                        atoms.add(bond.GetBeginAtomIdx())
                        atoms.add(bond.GetEndAtomIdx())
                    shingle = Chem.rdmolfiles.MolFragmentToSmiles(
                        mol, list(atoms), bonds, 0, 0, False, False, atm, True,
                        False, False)
Example #27
0
 def __init__(inchiStr):
     self.mol = Chem.inchi.MolFromInchi(inchiStr)
     self.symmEquivalence = Chem.CanonicalRankAtoms(self.mol)
Example #28
0
    def get_mol(self):
        """
        Return section containing element types, fitting weight, molecule
        title, number of atoms and atom equivalencing.

        groups_frozen: Freeze charges in groups to the values in qin file,
                       typcially obtained from previous resp run.

        h_equiv      : Fit charges of degenerate hydrogen atoms together

        all_equiv    : Freeze charges of all degenerate atoms together. If
                       this is activated, and h_equiv is deactivated, only
                       heavy-atom atomic centers will be fitted together.
        """

        line_2I5 = fortranformat.FortranRecordWriter('2I5')

        _tmp_str = list()

        for mol_i in range(self._mol_count):
            mol = self._mol_list[mol_i]

            _tmp_str.append('  %f\n' % self._mol_weight_list[mol_i])
            _tmp_str.append('  %s\n' % self._mol_name_list[mol_i])

            _charge = float(self._mol_charge_list[mol_i])
            _charge = round(_charge)
            _charge = int(_charge)
            _natoms = mol.GetNumAtoms()
            _tmp_str.append(line_2I5.write([_charge, _natoms]))
            _tmp_str.append('\n')

            canonical_rank = list()
            string_rank = list(Chem.CanonicalRankAtoms(mol, breakTies=False))
            for rank in string_rank:
                canonical_rank.append(int(rank))
            del string_rank
            ### This really never worked perfectly...
            canonicalize_tautomers(canonical_rank, mol)

            index_list = np.arange(_natoms)

            if mol_i not in self._intermol1:
                for atom_i in index_list:
                    atom = mol.GetAtomWithIdx(int(atom_i))
                    at_num = atom.GetAtomicNum()
                    _tmp_str.append(line_2I5.write([at_num, 0]))
                    _tmp_str.append('\n')

            else:
                for atom_i in index_list:
                    atom = mol.GetAtomWithIdx(int(atom_i))
                    at_num = atom.GetAtomicNum()
                    if mol_i in self._free_list_mol:
                        mol_i_idx = self._free_list_mol.index(mol_i)
                        if atom_i in self._free_list[mol_i_idx]:
                            _tmp_str.append(line_2I5.write([at_num, 0]))
                            _tmp_str.append('\n')
                            continue

                    placed_frozen = False

                    if self.unfreeze_all:
                        _tmp_str.append(line_2I5.write([at_num, 0]))

                    elif self.groups_frozen:
                        ### Check if atom itself is in group
                        for index, atom_j in enumerate(self._group_atom_list):
                            if atom_j==atom_i \
                            and self._group_mol_list[index] == mol_i:
                                if self.noh_frozen and at_num != 1:
                                    _tmp_str.append(
                                        line_2I5.write([at_num, -1]))
                                elif not self.h_groups_frozen and at_num == 1:
                                    if self.h_equiv:
                                        canon_eq_bool = np.isin(
                                            canonical_rank,
                                            canonical_rank[atom_i])
                                        canon_eq_int = index_list[
                                            canon_eq_bool]
                                        if atom_i == canon_eq_int[0]:
                                            _tmp_str.append(
                                                line_2I5.write([at_num, 0]))
                                        else:
                                            _tmp_str.append(
                                                line_2I5.write([
                                                    at_num, canon_eq_int[0] + 1
                                                ]))
                                    else:
                                        _tmp_str.append(
                                            line_2I5.write([at_num, 0]))
                                else:
                                    _tmp_str.append(line_2I5.write([at_num,
                                                                    0]))
                                placed_frozen = True
                                break

                    if not placed_frozen:
                        if self.noh_frozen and at_num != 1:
                            _tmp_str.append(line_2I5.write([at_num, -1]))
                        elif (self.h_equiv and at_num == 1) \
                        or (self.all_equiv and not self.h_equiv and at_num != 1):
                            ### canon_eq_bool is True for all atoms that are canonically
                            ### equal to atom_i (including atom_i itself).
                            ### canon_eq_int holds atom indices of all atoms that are
                            ### canonically equal to atom_i (including atom_i itself).
                            canon_eq_bool = np.isin(canonical_rank,
                                                    canonical_rank[atom_i])
                            canon_eq_int = index_list[canon_eq_bool]
                            ### This is fulfilled only when we encounter this canoncial
                            ### rank (stored in canonical_rank[atom_i]) for the first
                            ### time in this molecule. It will tell resp to let that
                            ### atom center vary independly.
                            if atom_i == canon_eq_int[0]:
                                _tmp_str.append(line_2I5.write([at_num, 0]))
                            ### If current atom atom_i is equivalent to another atom
                            ### which is present in a different group than atom_i, then
                            ### we should not equivalence constraints on these two atoms.
                            ### If we already have encountered this canoncial rank before
                            ### freeze atom_i to the atom that was our first encounter with
                            ### this canonical rank. Note, that resp expects atom counting
                            ### to start at 1, *not* 0.
                            else:
                                _tmp_str.append(
                                    line_2I5.write(
                                        [at_num, canon_eq_int[0] + 1]))


#                            else:
#                                found_in_group = False
#                                for index2, atom_j in enumerate(self._group_atom_list):
#                                    if canon_eq_int[0]==atom_j \
#                                    and self._group_mol_list[index2]==mol_i:
#                                        for index, atom_k in enumerate(self._group_atom_list):
#                                            if atom_i==atom_k \
#                                            and self._group_mol_list[index]==mol_i:
#                                                if index2 == index:
#                                                    _tmp_str.append(line_2I5.write([at_num, canon_eq_int[0]+1]))
#                                                else:
#                                                    _tmp_str.append(line_2I5.write([at_num, 0]))
#                                                found_in_group = True
#                                            if found_in_group:
#                                                break
#                                    if found_in_group:
#                                        break
#
#                                if not found_in_group:
#                                    _tmp_str.append(line_2I5.write([at_num, 0]))

                        else:
                            _tmp_str.append(line_2I5.write([at_num, 0]))

                    _tmp_str.append('\n')

            if self._mol_count > 1:
                _tmp_str.append('\n')

        return ''.join(_tmp_str)
Example #29
0
def make_multiple_cuts(mol, atom_pairs, chiral_flags, fragment_filter):
    num_cuts = len(atom_pairs)
    assert num_cuts >= 2, num_cuts
    fragmented_mol, other_atom_table = fragment_on_atom_pairs(mol, atom_pairs)

    # Figure out which atoms are in the variable part and which atoms are in the constant part.

    constant_atom_indices = []
    variable_atom_indices = []
    for atom_indices in Chem.GetMolFrags(fragmented_mol):
        non_wildcard_indices = []
        for atom_index in atom_indices:
            if fragmented_mol.GetAtomWithIdx(atom_index).GetAtomicNum() != 0:
                non_wildcard_indices.append(atom_index)
        num_wildcard_atoms = len(atom_indices) - len(non_wildcard_indices)
        if num_wildcard_atoms == 1:
            # Filter out fragmentations with too small fragments in the constant
            if len(non_wildcard_indices
                   ) < fragment_filter.min_heavies_per_const_frag:
                return
            constant_atom_indices.extend(non_wildcard_indices)
        elif num_wildcard_atoms == num_cuts:
            variable_atom_indices.extend(non_wildcard_indices)
        else:
            # Did not cut into core+rgroups
            return


#    # Filter out fragmentations with too small fragments in the constant
#    if fragment_filter.min_heavies_per_const_frag > 0:
#        for frag in Chem.GetMolFrags(fragmented_mol, asMols=True):
#            num_wildcards = 0
#            for atom in frag.GetAtoms():
#                if atom.GetAtomicNum() == 0:
#                    num_wildcards += 1
#            if num_wildcards == 1 and frag.GetNumHeavyAtoms() < fragment_filter.min_heavies_per_const_frag:
#                yield None
#                return

# Determine the symmetry of the variable part
    fragmented_mol.UpdatePropertyCache(
        strict=False)  # XXX magic; without it I get a RuntimeError
    Chem.AssignStereochemistry(fragmented_mol, cleanIt=True, force=True)

    #   "getNumImplicitHs() called without preceding call to calcImplicitValence()"
    new_atom_ranks = Chem.CanonicalRankAtoms(fragmented_mol, breakTies=False)
    new_chiral_flags = get_chiral_flags(mol, new_atom_ranks)

    seen_smiles = set()
    #
    for enumeration_label, chiral_assignments in up_enumerate(
            fragmented_mol, constant_atom_indices, variable_atom_indices,
            chiral_flags, new_chiral_flags):
        if enumeration_label == EnumerationLabel.NO_ENUMERATION:
            assert chiral_assignments is None
            atom_ranks = new_atom_ranks
            ## print("reused:", list(atom_ranks))
        else:
            for (atom_index, chiral_tag) in chiral_assignments:
                fragmented_mol.GetAtomWithIdx(atom_index).SetChiralTag(
                    chiral_tag)
            fragmented_mol.ClearComputedProps()  # XXX Do I need this?
            atom_ranks = Chem.CanonicalRankAtoms(fragmented_mol,
                                                 breakTies=False)
            ## print("computed:", list(atom_ranks))

        # Work in SMILES space so we find a canonical mapping between the
        # unlabeled canonical variable and canonical constant parts.
        smiles = cansmiles(fragmented_mol)
        #print("smiles", smiles)

        # The up-enumeration may have several ways to generate the same structure.
        # For example, flipping two "@"s to "@@"s may leave the structure unchanged.
        if smiles in seen_smiles:
            continue
        seen_smiles.add(smiles)

        # Figure out which is the variable/core structure.
        # It's the one with the most "*"s on it (must equal the number of cuts)
        frag_smiles_list = smiles.split(".")
        assert len(frag_smiles_list) == num_cuts + 1, smiles
        variable_component_index = _get_variable_index(frag_smiles_list)
        if variable_component_index is None:
            # 3 cuts but no fragment with three "*"s
            raise AssertionError(("I already checked for this", smiles))

        #print("core is at", variable_component_index)

        # Get the mapping from position in the SMILES string to atom index in the molecule
        smiles_index_to_atom_index = get_atom_order_in_smiles(fragmented_mol)

        # Determine the constant part (the rgroups)
        constant_component_indices = list(range(num_cuts + 1))
        del constant_component_indices[variable_component_index]
        constant_smiles_list = [
            frag_smiles_list[i] for i in constant_component_indices
        ]
        assert len(constant_smiles_list) == num_cuts

        # Find the connection points on the variable part
        component_atom_symbols = get_component_atom_symbols(smiles)
        variable_connection_atom_indices = []
        variable_atom_indices2 = []
        for smiles_index, smiles_symbol in component_atom_symbols[
                variable_component_index]:
            atom_index = smiles_index_to_atom_index[smiles_index]
            if "*" in smiles_symbol:
                variable_connection_atom_indices.append(atom_index)
            else:
                variable_atom_indices2.append(atom_index)  # XXX Remove
        assert sorted(variable_atom_indices) == sorted(
            variable_atom_indices2), (sorted(variable_atom_indices),
                                      sorted(variable_atom_indices2))

        assert len(variable_connection_atom_indices) == num_cuts

        #print("variable_connection_atom_indices", variable_connection_atom_indices)
        variable_symmetry_class = get_symmetry_class(
            *(atom_ranks[atom_index]
              for atom_index in variable_connection_atom_indices))

        # Determine the symmetry of the constant part (the rgroups)

        constant_symmetry_class = get_symmetry_class(*constant_smiles_list)

        # Figure out which R-groups in the constant part correspond to the
        # attachment points in the core/variable part.
        atom_index_to_rgroup_label = {}
        constant_atom_indices = []
        for rgroup_id, component_i in enumerate(constant_component_indices):
            rgroup_label = str(rgroup_id)
            for (smiles_index,
                 smiles_symbol) in component_atom_symbols[component_i]:
                atom_index = smiles_index_to_atom_index[smiles_index]
                atom_index_to_rgroup_label[atom_index] = rgroup_label
                if "*" not in smiles_symbol:
                    constant_atom_indices.append(atom_index)

        attachment_order = "".join(
            atom_index_to_rgroup_label[other_atom_table[atom_index]]
            for atom_index in variable_connection_atom_indices)
        # Figure the canonical attachment order
        canonical_attachment_order = CANONICAL_ATTACHMENT_ORDER[
            variable_symmetry_class, constant_symmetry_class, attachment_order]

        # Figure out which atoms in the variable part are still chiral
        ## fragmented_chiral_flags = get_chiral_flags(fragmented_mol, atom_ranks)
        ## variable_num_chirals, variable_num_lost_chirals, variable_num_new_stereocenters = \
        ##   get_chiral_difference(variable_atom_indices2, chiral_flags, fragmented_chiral_flags)

        ## constant_num_chirals, constant_num_lost_chirals, constant_num_new_stereocenters = \
        ##   get_chiral_difference(constant_atom_indices2, chiral_flags, fragmented_chiral_flags)

        variable_smiles = frag_smiles_list[variable_component_index]
        constant_smiles = ".".join(constant_smiles_list)
        ## print("variable_smiles:", variable_smiles)
        ## print("constant_smiles:", constant_smiles)

        # Test that I can reconnect
        if 0:
            offsets = [int(c) for c in canonical_attachment_order]
            var_part = smiles_syntax.convert_wildcards_to_closures(
                variable_smiles, offsets)
            const_part = smiles_syntax.convert_wildcards_to_closures(
                constant_smiles, list(range(num_cuts)))
            smi = Chem.CanonSmiles(var_part + "." + const_part, 0)
            expected_smi = Chem.MolToSmiles(mol)
            if smi != expected_smi:
                print("     Got:", smi)
                print("Expected:", expected_smi)
            assert smi == expected_smi, (smi, expected_smi)

        ## print("Fragmentation")
        ## print(get_num_heavies_from_smiles(variable_smiles), variable_symmetry_class, variable_smiles)

        yield Fragmentation(
            num_cuts,
            enumeration_label,
            get_num_heavies_from_smiles(variable_smiles),
            variable_symmetry_class,
            variable_smiles,
            canonical_attachment_order,
            get_num_heavies_from_smiles(constant_smiles),
            constant_symmetry_class,
            constant_smiles,
            None,
        )
Example #30
0
def make_single_cut(mol, atom_pair, chiral_flags, fragment_filter):
    fragmented_mol, other_atom_table = fragment_on_atom_pairs(mol, [atom_pair])
    frag1_indices, frag2_indices = Chem.GetMolFrags(fragmented_mol)

    # Remove the indices for the wildcard atoms (should be the last two atoms in the molecule)
    num_atoms = fragmented_mol.GetNumAtoms()
    a1, a2 = num_atoms - 1, num_atoms - 2
    assert fragmented_mol.GetAtomWithIdx(a1).GetAtomicNum() == 0
    assert fragmented_mol.GetAtomWithIdx(a2).GetAtomicNum() == 0

    frag1_smiles = Chem.MolFragmentToSmiles(fragmented_mol,
                                            frag1_indices,
                                            isomericSmiles=True)
    frag2_smiles = Chem.MolFragmentToSmiles(fragmented_mol,
                                            frag2_indices,
                                            isomericSmiles=True)

    frag1_num_atoms = get_num_heavies_from_smiles(frag1_smiles)
    frag2_num_atoms = get_num_heavies_from_smiles(frag2_smiles)

    # Determine the symmetry of both parts
    fragmented_mol.UpdatePropertyCache(
        strict=False)  # XXX magic; without it I get a RuntimeError

    # Need to clear chiral tags which are no longer relevant because the new
    # wildcards are symmetric. The canonical SMILES output is affected by an
    # atom's chiral tag, even if the output doesn't denote chirality for that
    # atom. I need to clear the tags to get a truly canonical output.
    # See https://sourceforge.net/p/rdkit/mailman/message/35420297/ , from Greg
    # Landrum, on 2016-10-11 05:39:12 titled "identify chiral atoms which
    # became achiral after fragmenting".
    Chem.AssignStereochemistry(fragmented_mol, cleanIt=True, force=True)

    #   "getNumImplicitHs() called without preceding call to calcImplicitValence()"
    new_atom_ranks = Chem.CanonicalRankAtoms(fragmented_mol, breakTies=False)
    ## print("new_atom_ranks:", list(new_atom_ranks))
    new_chiral_flags = get_chiral_flags(mol, new_atom_ranks)

    up_enumerations = []
    for frag_indices in (frag1_indices, frag2_indices):
        ## print("indices", frag_indices)
        ## print("chiral_flags", len(chiral_flags), chiral_flags)
        ## print("new_chiral_flags", len(new_chiral_flags), new_chiral_flags)

        frag_indices_without_wildcard = [a for a in frag1_indices if a < a2]
        chiral_indices = get_new_stereocenter_indices(
            frag_indices_without_wildcard, chiral_flags, new_chiral_flags)
        up_enumeration = set()
        for chiral_assignment in chiral_enumerate(chiral_indices):
            for (atom_index, chiral_tag) in chiral_assignment:
                fragmented_mol.GetAtomWithIdx(atom_index).SetChiralTag(
                    chiral_tag)
            up_smiles = Chem.MolFragmentToSmiles(fragmented_mol,
                                                 frag_indices,
                                                 isomericSmiles=True)
            up_enumeration.add(up_smiles)
        up_enumerations.append(up_enumeration)

    frag1_up_enumerations, frag2_up_enumerations = up_enumerations

    # fragment 1 is the constant part and 2 is variable.
    for ((constant_num_atoms, constant_smiles, constant_up_enumerations,
          variable_num_atoms, variable_smiles, variable_up_enumerations)) in (
              (frag1_num_atoms, frag1_smiles, frag1_up_enumerations,
               frag2_num_atoms, frag2_smiles, frag2_up_enumerations),
              (frag2_num_atoms, frag2_smiles, frag2_up_enumerations,
               frag1_num_atoms, frag1_smiles, frag1_up_enumerations),
          ):

        if constant_num_atoms < fragment_filter.min_heavies_per_const_frag:
            continue

        constant_smiles_with_H = replace_wildcard_with_H(constant_smiles)
        yield Fragmentation(1, EnumerationLabel.NO_ENUMERATION,
                            variable_num_atoms, "1", variable_smiles, "0",
                            constant_num_atoms, "1", constant_smiles,
                            constant_smiles_with_H)

        # up-enumeration in the constant part
        for constant_up_smiles in constant_up_enumerations:
            yield Fragmentation(1, EnumerationLabel.CONSTANT_UP_ENUMERATION,
                                variable_num_atoms, "1", variable_smiles, "0",
                                constant_num_atoms, "1", constant_up_smiles,
                                replace_wildcard_with_H(constant_up_smiles))

        # up-enumeration in the variable part
        for variable_up_smiles in variable_up_enumerations:
            yield Fragmentation(1, EnumerationLabel.VARIABLE_UP_ENUMERATION,
                                variable_num_atoms, "1", variable_up_smiles,
                                "0", constant_num_atoms, "1",
                                constant_up_smiles, constant_smiles_with_H)