def fix_bond_order(mol: Chem.Mol) -> Chem.Mol:
    """On a Mol where hydrogens are present it guesses bond order."""
    def is_sp2(atom: Chem.Atom) -> bool:
        N_neigh = len(atom.GetBonds())
        symbol = atom.GetSymbol()
        if symbol == 'H':
            return False
        elif symbol == 'N' and N_neigh < 3:
            return True
        elif symbol == 'C' and N_neigh < 4:
            return True
        elif symbol == 'O' and N_neigh < 2:
            return True
        else:
            return False

    def get_other(bond: Chem.Bond, atom: Chem.Atom) -> Chem.Atom:
        """Given an bond and an atom return the other."""
        if bond.GetEndAtomIdx() == atom.GetIdx(
        ):  # atom == itself gives false.
            return bond.GetBeginAtom()
        else:
            return bond.GetEndAtom()

    def find_sp2_bonders(atom: Chem.Atom) -> List[Chem.Atom]:
        return [neigh for neigh in find_bonders(atom) if is_sp2(neigh)]

    def find_bonders(atom: Chem.Atom) -> List[Chem.Atom]:
        return [get_other(bond, atom) for bond in atom.GetBonds()]

    def descr(atom: Chem.Atom) -> str:
        return f'{atom.GetSymbol()}{atom.GetIdx()}'

    ## main body of function
    for atom in mol.GetAtoms():
        # print(atom.GetSymbol(), is_sp2(atom), find_sp2_bonders(atom))
        if is_sp2(atom):
            doubles = find_sp2_bonders(atom)
            if len(doubles) == 1:
                # tobedoubled.append([atom.GetIdx(), doubles[0].GetIdx()])
                b = mol.GetBondBetweenAtoms(atom.GetIdx(), doubles[0].GetIdx())
                if b:
                    b.SetBondType(Chem.rdchem.BondType.DOUBLE)
                else:
                    raise ValueError('Issue with:', descr(atom),
                                     descr(doubles[0]))
            elif len(doubles) > 1:
                for d in doubles:
                    b = mol.GetBondBetweenAtoms(atom.GetIdx(), d.GetIdx())
                if b:
                    b.SetBondType(Chem.rdchem.BondType.AROMATIC)
                    b.SetIsAromatic(True)
                else:
                    raise ValueError('Issue with:', descr(atom), descr(d))
            elif len(doubles) == 0:
                print(descr(atom), ' is underbonded!')
        else:
            pass
    return mol
예제 #2
0
 def make_pair_by_split(self, conjoined: Chem.Mol,
                        atom_idx: int) -> Tuple[Chem.Mol, Chem.Mol]:
     # make overlapping mols by getting a single molecule, and split it
     # this gives more control over Chem.rdMolAlign.AlignMol as this may overlap other atoms.
     # negative weights does not work...
     # fore
     bond = conjoined.GetBondBetweenAtoms(atom_idx, atom_idx + 1)
     fragged = Chem.FragmentOnBonds(conjoined, [bond.GetIdx()],
                                    addDummies=False)
     fore = Chem.GetMolFrags(fragged, asMols=True)[0]
     bond = conjoined.GetBondBetweenAtoms(atom_idx - 1, atom_idx)
     fragged = Chem.FragmentOnBonds(conjoined, [bond.GetIdx()],
                                    addDummies=False)
     aft = Chem.GetMolFrags(fragged, asMols=True)[1]
     return fore, aft
예제 #3
0
def substructure_to_feature(mol: Chem.Mol,
                            substructure: FrozenSet[int],
                            fg_features: List[List[int]] = None) -> str:
    """
    Converts a substructure (set of atom indices) to a feature string
    by sorting and concatenating atom and bond feature vectors.

    :param mol: A molecule.
    :param substructure: A set of atom indices representing a substructure.
    :param fg_features: A list of k-hot vector indicating the functional groups the atom belongs to.
    :return: A string representing the featurization of the substructure.
    """
    if fg_features is None:
        fg_features = [None] * mol.GetNumAtoms()

    substructure = list(substructure)
    atoms = [Chem.Mol.GetAtomWithIdx(mol, idx) for idx in substructure]
    bonds = []
    for i in range(len(substructure)):
        for j in range(i + 1, len(substructure)):
            a1, a2 = substructure[i], substructure[j]
            bond = mol.GetBondBetweenAtoms(a1, a2)
            if bond is not None:
                bonds.append(bond)

    features = [str(atom_features(atom, fg_features[atom.GetIdx()])) for atom in atoms] + \
               [str(bond_features(bond)) for bond in bonds]
    features.sort(
    )  # ensure identical feature string for different atom/bond ordering
    features = str(features)

    return features
예제 #4
0
 def _are_rings_bonded(self, mol: Chem.Mol, ringA: Tuple[int], ringB: Tuple[int]):
     for i in ringA:
         for j in ringB:
             if mol.GetBondBetweenAtoms(i, j) is not None:
                 return True
     else:
         return False
예제 #5
0
def _GetBurdenMatrix(mol: Chem.Mol, propertylabel: str = 'm') -> numpy.matrix:
    """Calculate weighted Burden matrix and eigenvalues."""
    mol = Chem.AddHs(mol)
    Natom = mol.GetNumAtoms()
    AdMatrix = Chem.GetAdjacencyMatrix(mol)
    bondindex = numpy.argwhere(AdMatrix)
    AdMatrix1 = numpy.array(AdMatrix, dtype=numpy.float32)
    # The diagonal elements of B, Bii, are either given by
    # the carbon normalized atomic mass,
    # van der Waals volume, Sanderson electronegativity,
    # and polarizability of atom i.
    for i in range(Natom):
        atom = mol.GetAtomWithIdx(i)
        temp = GetRelativeAtomicProperty(element=atom.GetSymbol(), propertyname=propertylabel)
        AdMatrix1[i, i] = round(temp, 3)
    # The element of B connecting atoms i and j, Bij,
    # is equal to the square root of the bond
    # order between atoms i and j.
    for i in bondindex:
        bond = mol.GetBondBetweenAtoms(int(i[0]), int(i[1]))
        if bond.GetBondType().name == 'SINGLE':
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1), 3)
        if bond.GetBondType().name == "DOUBLE":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(2), 3)
        if bond.GetBondType().name == "TRIPLE":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(3), 3)
        if bond.GetBondType().name == "AROMATIC":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1.5), 3)
    # All other elements of B (corresponding non bonded
    # atom pairs) are set to 0.001
    bondnonindex = numpy.argwhere(AdMatrix == 0)
    for i in bondnonindex:
        if i[0] != i[1]:
            AdMatrix1[i[0], i[1]] = 0.001
    return numpy.real(numpy.linalg.eigvals(AdMatrix1))
예제 #6
0
    def _categorise(self, mol: Chem.Mol,
                    uniques: set) -> Dict[str, Union[set, Dict]]:
        """
        What do the novel atoms do in terms of connectivity.
        Complicated dict output (called ``categories`` in the methods). Really ought to be SetProp of the atoms.

        * ``uniques`` are set of atoms to classify on
        * ``internals`` are unique atoms that are connected solely to unique atoms
        * ``attachments`` are non-unique atoms to which a unique atom connects
        * ``pairs`` is a dict of unique atom idx --> dict of ``idx`` --> attachment idx and ``type`` bond type.

        :param mol: molecule to describe
        :param uniques: set of indices that are new to this molecule
        :return:
        """
        #
        pairs = {}
        internals = set()
        attachments = set()
        dummies = set()
        for i in uniques:  # novel atoms
            unique_atom = mol.GetAtomWithIdx(i)
            if unique_atom.GetSymbol() == self.dummy_symbol:
                dummies.add(i)
            neighbours = {n.GetIdx() for n in unique_atom.GetNeighbors()}
            if len(neighbours - uniques
                   ) == 0:  # unlessone of the connections is not unique.
                internals.add(i)
            else:
                i_attached = neighbours - uniques
                attachments |= i_attached
                pairs[i] = [{
                    'idx': j,
                    'type': mol.GetBondBetweenAtoms(i, j).GetBondType()
                } for j in i_attached]
        anchors = uniques - internals
        # store for safekeeping
        for atom in mol.GetAtoms():
            i = atom.GetIdx()
            if i in internals:  # novel and not connected
                atom.SetProp('_Category', 'internal')
            elif i in attachments:  # not-novel but connected
                atom.SetProp('_Category', 'overlapping-attachment')
            elif i in pairs:  # dict not set tho
                atom.SetProp('_Category', 'internal-attachment')
            else:  # overlapping
                atom.SetProp('_Category', 'overlapping')
        # if self._debug_draw: # depracated... but this could be useful...
        #     high = list(internals) + list(attachments) + list(anchors)
        #     color = {**{i: (0, 0.8, 0) for i in internals},
        #              **{i: (0, 0, 0.8) for i in attachments},
        #              **{i: (0.8, 0, 0.8) for i in anchors}}
        #     print('Purple: anchor atoms, Blue: attachments, Green: internals')
        #     self.draw_nicely(mol, highlightAtoms=high, highlightAtomColors=color)
        #     print({atom.GetIdx(): atom.GetProp('_Category') for atom in mol.GetAtoms()})
        return dict(uniques=uniques,
                    internals=internals,
                    attachments=attachments,
                    pairs=pairs,
                    dummies=dummies)
예제 #7
0
    def get_conjugate_group_with_halogen(m: Mol):
        natoms = len(m.GetAtoms())
        adjmat = np.zeros((natoms, natoms), dtype=bool)
        for i in range(natoms):
            for j in range(i + 1, natoms):
                if isinstance(m.GetBondBetweenAtoms(i, j), Bond):
                    adjmat[i][j] = True
                    adjmat[j][i] = True

        supp = ResonanceMolSupplier(m, )
        # supp = ResonanceMolSupplier(m, Chem.KEKULE_ALL)
        # supp = ResonanceMolSupplier(m, Chem.ALLOW_CHARGE_SEPARATION)
        cg_dict = {}
        a: Atom
        for a in m.GetAtoms():
            aid = a.GetIdx()
            cgid = supp.GetAtomConjGrpIdx(aid)
            if cgid < 1e5:
                cg_dict[aid] = cgid
        cgids = set(cg_dict.values())
        cgs = []
        for cgid in cgids:
            cg = [i for i in cg_dict.keys() if cg_dict[i] == cgid]
            atom: Atom
            for atom in m.GetAtoms():
                if atom.GetIdx() not in cg:
                    if any(adjmat[atom.GetIdx()][cg_aid]
                           for cg_aid in cg) and atom.GetSymbol() in ("I", "F",
                                                                      "Cl",
                                                                      "Br"):
                        cg.append(atom.GetIdx())
            cgmol, old_id_2_new_id = RdFunc.get_sub_rdmol(m, cg)
            cgs.append([cgmol, old_id_2_new_id])
        return sorted(cgs, key=lambda x: x[0].GetNumAtoms(), reverse=True)
예제 #8
0
def get_edge_infos(molecule: Chem.Mol, graph: Graph):
    edge_infos = []
    for (source, sink) in graph.edges:
        kind = graph.edges[(source, sink)]['kind']

        if kind == 1:
            bond = molecule.GetBondBetweenAtoms(source, sink)
            edge_info = EdgeInfo(
                distance=tools.get_atom_distance(molecule, source, sink),
                atom_ids=(source, sink),
                kind=kind,
                stereo=bond.GetStereo(),
                bond_type=bond.GetBondType(),
                is_aromatic=bond.GetIsAromatic(),
                is_conjugated=bond.GetIsConjugated(),
                is_in_ring_size=tuple(
                    int(bond.IsInRingSize(size)) for size in RING_SIZES),
            )
        else:
            edge_info = EdgeInfo(
                distance=tools.get_atom_distance(molecule, source, sink),
                atom_ids=(source, sink),
                kind=kind,
            )

        edge_infos.append(edge_info)

    return edge_infos
def construct_discrete_edge_matrix(mol: Chem.Mol):
    if mol is None:
        return None
    N = mol.GetNumAtoms()
    #adj = Chem.rdmolops.GetAdjacencyMatrix(mol)
    #size = adj.shape[0]
    size = MAX_NUMBER_ATOM
    adjs = numpy.zeros((4, size, size), dtype=numpy.float32)
    for i in range(N):
        for j in range(N):
            bond = mol.GetBondBetweenAtoms(i, j)  # type: Chem.Bond
            if bond is not None:
                bondType = str(bond.GetBondType())
                if bondType == 'SINGLE':
                    adjs[0, i, j] = 1.0
                elif bondType == 'DOUBLE':
                    adjs[1, i, j] = 1.0
                elif bondType == 'TRIPLE':
                    adjs[2, i, j] = 1.0
                elif bondType == 'AROMATIC':
                    adjs[3, i, j] = 1.0
                else:
                    print("[ERROR] Unknown bond type", bondType)
                    assert False  # Should not come here
    return adjs
예제 #10
0
 def _prevent_allene(self, mol: Chem.Mol) -> Chem.Mol:
     if not isinstance(mol, Chem.RWMol):
         mol = Chem.RWMol(mol)
     for atom in mol.GetAtoms():
         if atom.GetAtomicNum() < 14:
             n = []
             for bond in atom.GetBonds():
                 if bond.GetBondType().name in ('DOUBLE', 'TRIPLE'):
                     n.append(bond)
                 else:
                     pass
             if len(n) > 2:
                 #this is a mess!
                 log.info(f'Allene issue: {n} double bonds on {atom.GetSymbol()} atom {atom.GetIdx()}!')
                 for bond in n:
                     bond.SetBondType(Chem.BondType().SINGLE)
             elif len(n) == 2:
                 # downgrade the higher bonded one!
                 others = [a for bond in n for a in (bond.GetBeginAtom(), bond.GetEndAtom()) if a.GetIdx() != atom.GetIdx()]
                 others = sorted(others, key=lambda atom: sum([b.GetBondTypeAsDouble() for b in atom.GetBonds()]))
                 log.info(f'Allene removed between {atom.GetIdx()} and {[a.GetIdx() for a in others]}')
                 mol.GetBondBetweenAtoms(atom.GetIdx(), others[-1].GetIdx()).SetBondType(Chem.BondType.SINGLE)
             else:
                 pass
         else:
             continue
     return mol
    def _categorise(self, mol: Chem.Mol,
                    uniques: set) -> Dict[str, Union[set, Dict]]:
        """
        What do the novel atoms do in terms of connectivity.
        Complicated dict output (called ``categories`` in the methods). Really ought to be SetProp of the atoms.

        * ``uniques`` are set of atoms to classify on
        * ``internals`` are unique atoms that are connected solely to unique atoms
        * ``attachments`` are non-unique atoms to which a unique atom connects
        * ``pairs`` is a dict of unique atom idx --> dict of ``idx`` --> attachment idx and ``type`` bond type.

        :param mol: molecule to describe
        :param uniques: set of indices that are new to this molecule
        :return:
        """
        #
        pairs = {}
        internals = set()
        attachments = set()
        dummies = set()
        for i in uniques:
            unique_atom = mol.GetAtomWithIdx(i)
            if unique_atom.GetSymbol() == self.dummy_symbol:
                dummies.add(i)
            neighbours = {n.GetIdx() for n in unique_atom.GetNeighbors()}
            if len(neighbours - uniques) == 0:
                internals.add(i)
            else:
                i_attached = neighbours - uniques
                attachments |= i_attached
                pairs[i] = [{
                    'idx': j,
                    'type': mol.GetBondBetweenAtoms(i, j).GetBondType()
                } for j in i_attached]
        anchors = uniques - internals
        if self._debug_draw:
            high = list(internals) + list(attachments) + list(anchors)
            color = {
                **{i: (0, 0.8, 0)
                   for i in internals},
                **{i: (0, 0, 0.8)
                   for i in attachments},
                **{i: (0.8, 0, 0.8)
                   for i in anchors}
            }
            self.draw_nicely(mol,
                             highlightAtoms=high,
                             highlightAtomColors=color)
        return dict(uniques=uniques,
                    internals=internals,
                    attachments=attachments,
                    pairs=pairs,
                    dummies=dummies)
예제 #12
0
    def process(mol: Mol, device: torch.device, **kwargs):
        n = mol.GetNumAtoms() + 1

        # graph = DGLGraph()
        # graph.add_nodes(n)
        # graph.add_edges(graph.nodes(), graph.nodes())
        # graph.add_edges(range(1, n), 0)

        a1 = []
        a2 = []
        cnt = 0

        f_bonds = []

        # for i in range(0,n):
        #     a1.append(i)
        #     a2.append(i)
        #     cnt += 1

        for i in range(1, n):
            a1.append(i)
            a2.append(0)
            cnt += 1
            f_bonds.append([0] * feature.BOND_FDIM)

        # graph.add_edges(0, range(1, n))
        for e in mol.GetBonds():
            u, v = e.GetBeginAtomIdx(), e.GetEndAtomIdx()
            a1.append(u + 1)
            a2.append(v + 1)
            a1.append(v + 1)
            a2.append(u + 1)
            bond = mol.GetBondBetweenAtoms(u, v)
            f_bond = feature.bond_features(bond)
            f_bonds.append(f_bond)
            f_bonds.append(f_bond)
            cnt += 2
            # graph.add_edge(u + 1, v + 1)
            # graph.add_edge(v + 1, u + 1)
        # adj = graph.adjacency_matrix(transpose=False).to_dense()
        edge_index = torch.tensor([a1, a2], dtype=torch.long, device=device)

        v, m = feature.mol_feature(mol)
        vec = torch.cat([
            torch.zeros((1, m)), v
        ]).to(device)

        # edge_attr = torch.rand(cnt, feature.BOND_FDIM)
        edge_attr = torch.tensor(f_bonds, dtype=torch.float32, device=device)

        return MPNNData(n, vec, edge_index, cnt, edge_attr)
예제 #13
0
 def _prevent_weird_rings(self, mol: Chem.Mol):
     if not isinstance(mol, Chem.RWMol):
         mol = Chem.RWMol(mol)
     ringatoms = self._get_ring_info(mol) #GetRingInfo().AtomRings()
     for ring_A, ring_B in itertools.combinations(ringatoms, r=2):
         shared = set(ring_A).intersection(set(ring_B))
         if len(shared) == 0:
             log.debug('This molecule has some separate rings')
             pass  # separate rings
         elif len(shared) == 1:
             log.debug('This molecule has a spiro bicycle')
             pass  # spiro ring.
         elif len(shared) == 2:
             log.debug('This molecule has a fused ring')
             if mol.GetBondBetweenAtoms(*shared) is not None:
                 pass  # indole/naphtalene
                 small, big = sorted([ring_A, ring_B], key=lambda ring: len(ring))
                 if len(small) == 4:
                     log.warning('This molecule has a benzo-azetine–kind-of-thing: expanding to indole')
                     # Chem.MolFromSmiles('C12CCCCC1CC2')
                     # benzo-azetine is likely an error: add and extra atom
                     a, b = set(small).difference(big)
                     self._place_between(mol, a, b)
                 elif len(small) == 3:
                     log.warning('This molecule has a benzo-cyclopropane–kind-of-thing: expanding to indole')
                     # Chem.MolFromSmiles('C12CCCCC1C2')
                     # benzo-cyclopronane is actually impossible at this stage.
                     a = list(set(small).difference(big))[0]
                     for b in shared:
                         self._place_between(mol, a, b)
                 else:
                     pass  # indole and nathalene
             elif (len(ring_A), len(ring_B)) == (6, 6):
                 raise Exception('This is utterly impossible')
             else:
                 print(f'mysterious ring system {len(ring_A)} + {len(ring_B)}')
                 pass  # ????
         elif len(shared) < self.atoms_in_bridge_cutoff:
             #adamantene/norbornane/tropinone kind of thing
             log.warning('This molecule has a bridge: leaving')
             pass  # ideally check if planar...
         else:
             log.warning('This molecule has a bridge that will be removed')
             mol = self._prevent_bridge_ring(mol, ring_A)
             # start from scratch.
             return self._prevent_weird_rings(mol)
     return mol.GetMol()
 def merge(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol,
           anchor_index: int, attachment_details: List[Dict]) -> Chem.Mol:
     for detail in attachment_details:
         attachment_index = detail['idx_F']  # fragmentanda attachment_index
         scaffold_attachment_index = detail['idx_S']
         bond_type = detail['type']
         f = Chem.FragmentOnBonds(fragmentanda, [
             fragmentanda.GetBondBetweenAtoms(anchor_index,
                                              attachment_index).GetIdx()
         ],
                                  addDummies=False)
         frag_split = []
         fragmols = Chem.GetMolFrags(f,
                                     asMols=True,
                                     fragsMolAtomMapping=frag_split,
                                     sanitizeFrags=False)
         if self._debug_draw:
             print(frag_split)
         # Get the fragment of interest.
         ii = 0
         for mol_N, indices in enumerate(frag_split):
             if anchor_index in indices:
                 break
             ii += len(indices)
         else:
             raise Exception
         frag = fragmols[mol_N]
         frag_anchor_index = indices.index(anchor_index)
         if self._debug_draw:
             self.draw_nicely(frag)
         combo = Chem.RWMol(rdmolops.CombineMols(scaffold, frag))
         scaffold_anchor_index = frag_anchor_index + scaffold.GetNumAtoms()
         if self._debug_draw:
             print(scaffold_anchor_index, scaffold_attachment_index,
                   anchor_index, scaffold.GetNumAtoms())
             self.draw_nicely(combo)
         combo.AddBond(scaffold_anchor_index, scaffold_attachment_index,
                       bond_type)
         Chem.SanitizeMol(
             combo,
             sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS +
             Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY,
             catchErrors=True)
         if self._debug_draw:
             self.draw_nicely(combo)
         scaffold = combo
     return scaffold
예제 #15
0
def total_bond_feature(mol: Mol) -> Tuple[torch.Tensor, int]:
    '''
    Extract bond features.

    Returns: (feature_vec, feature_dim)
    '''
    f_atoms = [atom for atom in mol.GetAtoms()]
    n_atoms = len(f_atoms)

    f_bonds = [[0] * BOND_FDIM]

    for a1 in range(n_atoms):
        for a2 in range(a1 + 1, n_atoms):
            bond = mol.GetBondBetweenAtoms(a1, a2)

            if bond is None:
                continue

            f_bond = bond_features(bond)
            f_bonds.append(f_bond)

    return torch.tensor(f_bonds), BOND_FDIM
예제 #16
0
    def get_sub_rdmol(m: Mol, atomids: [int]):
        atoms_in_old_mol: [Atom] = [
            a for a in m.GetAtoms() if a.GetIdx() in atomids
        ]
        atom_numbers = [a.GetAtomicNum() for a in atoms_in_old_mol]

        old_id_2_new_id = {}
        newid = 0
        for oldatom in atoms_in_old_mol:
            old_id = oldatom.GetIdx()
            old_id_2_new_id[old_id] = newid
            newid += 1

        mol = Chem.MolFromSmarts("[#" + str(atom_numbers[0]) + "]")
        rwmol = Chem.RWMol(mol)
        for s in atom_numbers[1:]:
            rwmol.AddAtom(Chem.Atom(s))

        # print('new mol atom')
        # for a in rwmol.GetAtoms():
        #     print(a.GetIdx(), a.GetSymbol())
        # print('--')

        for aini, ainj in combinations(atomids, 2):
            b = m.GetBondBetweenAtoms(aini, ainj)
            if isinstance(b, Bond):
                # iatom = m.GetAtomWithIdx(aini).GetSymbol()
                # jatom = m.GetAtomWithIdx(ainj).GetSymbol()
                # print('found bond {} {} - {} {}, {}'.format(iatom, aini, jatom, ainj, b.GetBondType()))
                bt = b.GetBondType()
                newi = old_id_2_new_id[aini]
                newj = old_id_2_new_id[ainj]
                rwmol.AddBond(newi, newj, bt)
                # newatomi = rwmol.GetAtomWithIdx(newi).GetSymbol()
                # newatomj = rwmol.GetAtomWithIdx(newj).GetSymbol()
                # print('added {} {} - {} {}'.format(newatomi, newi, newatomj, newj))
        mol = rwmol.GetMol()
        return mol, old_id_2_new_id
예제 #17
0
 def _prevent_conjoined_ring(self, mol: Chem.Mol) -> Chem.Mol:
     """
     This kills bridging bonds with not atoms in the bridge within rings.
     So it is bridged, fused and spiro safe.
     It removes only one bond, so andamantane/norbornane are safe.
     :param mol:
     :return:
     """
     c = Counter([i for ring in self._get_ring_info(mol) for i in ring])
     nested = [k for k in c if c[k] >= 3]
     pairs = [(idx_a, idx_b) for idx_a, idx_b in itertools.combinations(nested, r=2) if
              mol.GetBondBetweenAtoms(idx_a, idx_b) is not None]
     rank = sorted(pairs, key=lambda x: c[x[0]] + c[x[1]], reverse=True)
     if len(rank) > 0:
         idx_a, idx_b = rank[0]
         if not isinstance(mol, Chem.RWMol):
             mol = Chem.RWMol(mol)
         mol.RemoveBond(idx_a, idx_b) # SetBoolProp('_IsRingBond') is not important
         log.info(f'Zero-atom bridged ring issue: bond between {idx_a}-{idx_b} removed')
         return self._prevent_conjoined_ring(mol)
     elif isinstance(mol, Chem.RWMol):
         return mol.GetMol()
     else:
         return mol
예제 #18
0
    def _merge_part(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol,
                    anchor_index: int, attachment_details: List[Dict],
                    other_attachments: List[int],
                    other_attachment_details: List[List[Dict]]) -> Chem.Mol:
        """
        This does the messy work for merge_pair.

        :param scaffold: the Chem.Mol molecule onto whose copy the fragmentanda Chem.Mol gets added
        :param fragmentanda: The other Chem.Mol molecule
        :param anchor_index: the fragment-to-added's internal atom that attaches (hit indexed)
        :param attachment_details: see `_pre_fragment_pairs` or example below fo an entry
        :type attachment_details: List[Dict]
        :param other_attachments:
        :param other_attachment_details:
        :return: a new Chem.Mol molecule

        Details object example:

            [{'idx': 5,
              'type': rdkit.Chem.rdchem.BondType.SINGLE,
              'idx_F': 5, # fragmentanda index
              'idx_S': 1  # scaffold index
              }], ...}
        """
        # get bit to add.
        bonds_to_frag = []
        for detail in attachment_details:
            attachment_index = detail['idx_F']  # fragmentanda attachment_index
            bonds_to_frag += [
                fragmentanda.GetBondBetweenAtoms(anchor_index,
                                                 attachment_index).GetIdx()
            ]
        bonds_to_frag += [
            fragmentanda.GetBondBetweenAtoms(oi, oad[0]['idx_F']).GetIdx()
            for oi, oad in zip(other_attachments, other_attachment_details)
        ]
        f = Chem.FragmentOnBonds(fragmentanda, bonds_to_frag, addDummies=False)
        frag_split = []
        fragmols = Chem.GetMolFrags(f,
                                    asMols=True,
                                    fragsMolAtomMapping=frag_split,
                                    sanitizeFrags=False)
        # Get the fragment of interest.
        ii = 0
        for mol_N, indices in enumerate(frag_split):
            if anchor_index in indices:
                break
            ii += len(indices)
        else:
            raise Exception
        frag = fragmols[mol_N]
        frag_anchor_index = indices.index(anchor_index)
        # pre-emptively fix atom ori_i
        # offset collapsed to avoid clashes.
        self.offset(frag)
        # Experimental code.
        # TODO: finish!
        # frag_atom = frag.GetAtomWithIdx(frag_anchor_index)
        # old2future = {atom.GetIntProp('_ori_i'): atom.GetIdx() + scaffold.GetNumAtoms() for atom in frag.GetAtoms()}
        # del old2future[-1] # does nothing but nice to double tap
        # if frag_atom.GetIntProp('_ori_i') == -1: #damn.
        #     for absent in self._get_mystery_ori_i(frag):
        #         old2future[absent] = scaffold_attachment_index
        # self._renumber_original_indices(frag, old2future)
        combo = Chem.RWMol(rdmolops.CombineMols(scaffold, frag))
        scaffold_anchor_index = frag_anchor_index + scaffold.GetNumAtoms()
        for detail in attachment_details:
            # scaffold_anchor_index : atom index in scaffold that needs to be added to scaffold_attachment_index
            # but was originally attached to attachment_index in fragmentanda.
            # the latter is not kept.
            attachment_index = detail['idx_F']  # fragmentanda attachment_index
            scaffold_attachment_index = detail[
                'idx_S']  # scaffold attachment index
            bond_type = detail['type']
            combo.AddBond(scaffold_anchor_index, scaffold_attachment_index,
                          bond_type)
            new_bond = combo.GetBondBetweenAtoms(scaffold_anchor_index,
                                                 scaffold_attachment_index)
            # BondProvenance.set_bond(new_bond, '???')
            # self.transfer_ring_data(fragmentanda.GetAtomWithIdx(attachment_index),
            #                         combo.GetAtomWithIdx(scaffold_anchor_index))
        for oi, oad in zip(other_attachments, other_attachment_details):
            bond_type = oad[0]['type']
            scaffold_attachment_index = oad[0]['idx_S']
            scaffold_anchor_index = indices.index(oi) + scaffold.GetNumAtoms()
            combo.AddBond(scaffold_anchor_index, scaffold_attachment_index,
                          bond_type)
            new_bond = combo.GetBondBetweenAtoms(scaffold_anchor_index,
                                                 scaffold_attachment_index)
            # BondProvenance.set_bond(new_bond, '???')
        Chem.SanitizeMol(
            combo,
            sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS +
            Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY,
            catchErrors=True)
        self._prevent_two_bonds_on_dummy(combo)
        scaffold = combo.GetMol()
        return scaffold
예제 #19
0
    def collapse_ring(self, mol: Chem.Mol) -> Chem.Mol:
        """
        Collapses a ring(s) into a single dummy atom(s).
        Stores data as JSON in the atom.

        :param mol:
        :return:
        """
        self.store_positions(mol)
        mol = Chem.RWMol(mol)
        conf = mol.GetConformer()
        center_idxs = []
        morituri = []
        old2center = defaultdict(list)
        for atomset in mol.GetRingInfo().AtomRings():
            morituri.extend(atomset)
            neighs = []
            neighbonds = []
            bonds = []
            xs = []
            ys = []
            zs = []
            elements = []
            # add elemental ring
            c = mol.AddAtom(Chem.Atom('C'))
            center_idxs.append(c)
            central = mol.GetAtomWithIdx(c)
            name = mol.GetProp('_Name') if mol.HasProp('_Name') else '???'
            central.SetProp('_ori_name', name),
            # get data for storage
            for i in atomset:
                old2center[i].append(c)
                atom = mol.GetAtomWithIdx(i)
                neigh_i = [a.GetIdx() for a in atom.GetNeighbors()]
                neighs.append(neigh_i)
                bond = [mol.GetBondBetweenAtoms(i, j).GetBondType().name for j in neigh_i]
                bonds.append(bond)
                pos = conf.GetAtomPosition(i)
                xs.append(pos.x)
                ys.append(pos.y)
                zs.append(pos.z)
                elements.append(atom.GetSymbol())
            # store data in elemental ring
            central.SetIntProp('_ori_i', -1)
            central.SetProp('_ori_is', json.dumps(atomset))
            central.SetProp('_neighbors', json.dumps(neighs))
            central.SetProp('_xs', json.dumps(xs))
            central.SetProp('_ys', json.dumps(ys))
            central.SetProp('_zs', json.dumps(zs))
            central.SetProp('_elements', json.dumps(elements))
            central.SetProp('_bonds', json.dumps(bonds))
            conf.SetAtomPosition(c, Point3D(*[sum(axis) / len(axis) for axis in (xs, ys, zs)]))
        for atomset, center_i in zip(mol.GetRingInfo().AtomRings(), center_idxs):
            # bond to elemental ring
            central = mol.GetAtomWithIdx(center_i)
            neighss = json.loads(central.GetProp('_neighbors'))
            bondss = json.loads(central.GetProp('_bonds'))
            for neighs, bonds in zip(neighss, bondss):
                for neigh, bond in zip(neighs, bonds):
                    if neigh not in atomset:
                        bt = getattr(Chem.BondType, bond)
                        if neigh not in morituri:
                            mol.AddBond(center_i, neigh, bt)
                        else:
                            for other_center_i in old2center[neigh]:
                                if center_i != other_center_i:
                                    if not mol.GetBondBetweenAtoms(center_i, other_center_i):
                                        mol.AddBond(center_i, other_center_i, bt)
                                    break
                            else:
                                raise ValueError(f'Cannot find what {neigh} became')
        for i in sorted(set(morituri), reverse=True):
            mol.RemoveAtom(self._get_new_index(mol, i))
        return mol.GetMol()
예제 #20
0
    def _merge_part(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol,
                    anchor_index: int, attachment_details: List[Dict],
                    other_attachments: List[int],
                    other_attachment_details: List[List[Dict]]) -> Chem.Mol:
        """
        This does the messy work for merge_pair.

        :param scaffold:
        :param fragmentanda:
        :param anchor_index:
        :param attachment_details:
        :param other_attachments:
        :param other_attachment_details:
        :return:
        """
        # get bit to add.
        bonds_to_frag = []
        for detail in attachment_details:
            attachment_index = detail['idx_F']  # fragmentanda attachment_index
            bonds_to_frag += [
                fragmentanda.GetBondBetweenAtoms(anchor_index,
                                                 attachment_index).GetIdx()
            ]
        bonds_to_frag += [
            fragmentanda.GetBondBetweenAtoms(oi, oad[0]['idx_F']).GetIdx()
            for oi, oad in zip(other_attachments, other_attachment_details)
        ]
        if self._debug_draw and other_attachments:
            print('ring!', other_attachments)
            print('ring!', other_attachment_details)
        f = Chem.FragmentOnBonds(fragmentanda, bonds_to_frag, addDummies=False)
        frag_split = []
        fragmols = Chem.GetMolFrags(f,
                                    asMols=True,
                                    fragsMolAtomMapping=frag_split,
                                    sanitizeFrags=False)
        if self._debug_draw:
            print('Fragment splits')
            print(frag_split)
        # Get the fragment of interest.
        ii = 0
        for mol_N, indices in enumerate(frag_split):
            if anchor_index in indices:
                break
            ii += len(indices)
        else:
            raise Exception
        frag = fragmols[mol_N]
        frag_anchor_index = indices.index(anchor_index)
        # pre-emptively fix atom ori_i
        # offset collapsed to avoid clashes.
        self._offset_collapsed_ring(frag)
        self._offset_origins(frag)
        # Experimental code.
        # TODO: finish!
        # frag_atom = frag.GetAtomWithIdx(frag_anchor_index)
        # old2future = {atom.GetIntProp('_ori_i'): atom.GetIdx() + scaffold.GetNumAtoms() for atom in frag.GetAtoms()}
        # del old2future[-1] # does nothing but nice to double tap
        # if frag_atom.GetIntProp('_ori_i') == -1: #damn.
        #     for absent in self._get_mystery_ori_i(frag):
        #         old2future[absent] = scaffold_attachment_index
        # self._renumber_original_indices(frag, old2future)
        if self._debug_draw:
            print('Fragment to add')
            self.draw_nicely(frag)
        combo = Chem.RWMol(rdmolops.CombineMols(scaffold, frag))
        scaffold_anchor_index = frag_anchor_index + scaffold.GetNumAtoms()
        if self._debug_draw:
            print('Pre-merger')
            print(scaffold_anchor_index, attachment_details, anchor_index,
                  scaffold.GetNumAtoms())
            self.draw_nicely(combo)
        for detail in attachment_details:
            attachment_index = detail['idx_F']  # fragmentanda attachment_index
            scaffold_attachment_index = detail['idx_S']
            bond_type = detail['type']
            combo.AddBond(scaffold_anchor_index, scaffold_attachment_index,
                          bond_type)
        for oi, oad in zip(other_attachments, other_attachment_details):
            bond_type = oad[0]['type']
            scaffold_attachment_index = oad[0]['idx_S']
            scaffold_anchor_index = indices.index(oi) + scaffold.GetNumAtoms()
            combo.AddBond(scaffold_anchor_index, scaffold_attachment_index,
                          bond_type)
            if self._debug_draw:
                print(
                    f"Added additional {bond_type.name} bond between {scaffold_attachment_index} and {scaffold_anchor_index} " + \
                    f"(formerly {indices.index(oi)})")
        Chem.SanitizeMol(
            combo,
            sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS +
            Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY,
            catchErrors=True)
        if self._debug_draw:
            print('Merged')
            self.draw_nicely(combo)
        self._prevent_two_bonds_on_dummy(combo)
        scaffold = combo.GetMol()
        return scaffold