Exemple #1
0
    def from_unannotated_mols(cls, moved_followup: Chem.Mol,
                              hits: Sequence[Chem.Mol],
                              placed_followup: Chem.Mol):
        """
        Mapping is done by positional overlap between placed_followup and hits
        This mapping is the applied to moved_followup.

        :param moved_followup: The mol to be scored
        :param hits: the hits to score against
        :param placed_followup: the mol to determine how to score
        :return:
        """
        mappings = []
        moved_followup = AllChem.DeleteSubstructs(moved_followup,
                                                  Chem.MolFromSmiles('*'))
        placed_followup = AllChem.DeleteSubstructs(placed_followup,
                                                   Chem.MolFromSmiles('*'))
        if moved_followup.GetNumAtoms() != placed_followup.GetNumAtoms():
            # they may differ just because protons
            placed_followup = Chem.AddHs(placed_followup)
            assert moved_followup.GetNumAtoms() == placed_followup.GetNumAtoms(
            ), 'moved and placed are different!'
        for h, hit in enumerate(hits):
            mappings.append(
                list(
                    Fragmenstein.get_positional_mapping(
                        hit, placed_followup).items()))
        return cls(moved_followup, hits, mappings)
Exemple #2
0
    def get_possible_map(
            self,
            other: Chem.Mol,
            label: str,
            o_map: Dict[int, int],  # followup -> other
            inter_map: Dict[int, int],  # other -> combined
            combined: Chem.Mol,
            combined_map: Dict[int, int]) -> Dict[int, int]:
        """
        This analyses a single map (o_map) and returns a possible map

        :param other:
        :param label:
        :param o_map: followup -> other
        :param inter_map:
        :param combined:
        :param combined_map: followup -> combined
        :return: followup -> other
        """
        possible_map = {}
        strikes = 0  # x strikes is discarded
        accounted_for = set(combined_map.keys())
        for i, o in o_map.items():  # check each atom is okay
            # i = followup index
            # o = other index
            if i in accounted_for:  # this atom is accounted for. Check it is fine.
                if o in inter_map:  # this position overlaps
                    c = inter_map[o]  # equivalent index of combined
                    if c not in combined_map.values():
                        # the other atom does not contribute
                        strikes += 1
                    elif self.get_key(combined_map, c) == i:
                        pass  # that is fine.
                    else:  # no it's a different atom
                        strikes += 1
                else:  # this position does not overlaps. Yet atom is accounted for.
                    strikes += 1
            elif o not in inter_map:
                # new atom that does not overlap
                possible_map[i] = combined.GetNumAtoms() + o
            elif inter_map[o] not in combined_map.values():
                # overlaps but the overlap was not counted
                possible_map[i] = combined.GetNumAtoms() + o
            else:  # mismatch!
                log.debug(f'{label} - {i} mismatch')
                strikes += 1
        if strikes >= self.max_strikes:
            return {}
        elif not self.check_possible_distances(other,
                                               possible_map,
                                               combined,
                                               combined_map,
                                               cutoff=self.distance_cutoff):
            return {}
        else:
            return possible_map
Exemple #3
0
def CalculateHeteroNumber(mol: Chem.Mol) -> float:
    """Calculate number of Heteroatoms."""
    i = 0
    for atom in mol.GetAtoms():
        if atom.GetAtomicNum() not in [1, 6]:
            i += 1
    return mol.GetNumAtoms() - i
    def to_dgl(self: GraphFeaturiser, mol: Mol) -> dgl.DGLGraph:
        """Generates a DGL graph from a molecule.

        Args:
            mol: The molecule to featurise.

        Returns:
            A DGL graph of the featurised molecule.
        """
        num_atoms = mol.GetNumAtoms()
        bonds = mol.GetBonds()
        bond_from = [bond.GetBeginAtomIdx() for bond in bonds]
        bond_to = [bond.GetEndAtomIdx() for bond in bonds]

        g = dgl.graph((torch.tensor(bond_from), torch.tensor(bond_to)),
                      num_nodes=num_atoms)

        for key, atom_featuriser in self.atom_featurisers.items():
            atom_features = atom_featuriser.process_molecule(mol)
            g.ndata[key] = torch.tensor(atom_features, dtype=torch.float)

        for key, bond_featuriser in self.bond_featurisers.items():
            bond_features = [
                bond_featuriser.process_bond(bond) for bond in bonds
            ]
            g.edata[key] = torch.tensor(bond_features, dtype=torch.float)

        g = dgl.add_reverse_edges(g, copy_edata=True)

        if self.add_self_loops:
            g = dgl.add_self_loop(g)

        return g
Exemple #5
0
def _CalculateEState(mol: Chem.Mol, skipH: bool = True) -> float:
    """Get the EState value of each atom in the molecule."""
    mol = Chem.AddHs(mol)
    if skipH:
        mol = Chem.RemoveHs(mol)
    tb1 = Chem.GetPeriodicTable()
    nAtoms = mol.GetNumAtoms()
    Is = numpy.zeros(nAtoms, numpy.float)
    for i in range(nAtoms):
        at = mol.GetAtomWithIdx(i)
        atNum = at.GetAtomicNum()
        d = at.GetDegree()
        if d > 0:
            h = at.GetTotalNumHs()
            dv = tb1.GetNOuterElecs(atNum) - h
            # dv=numpy.array(_AtomHKDeltas(at),'d')
            N = _GetPrincipleQuantumNumber(atNum)
            Is[i] = (4.0 / (N * N) * dv + 1) / d
    dists = Chem.GetDistanceMatrix(mol, useBO=0, useAtomWts=0)
    dists += 1
    accum = numpy.zeros(nAtoms, numpy.float)
    for i in range(nAtoms):
        for j in range(i + 1, nAtoms):
            p = dists[i, j]
            if p < 1e6:
                temp = (Is[i] - Is[j]) / (p * p)
                accum[i] += temp
                accum[j] -= temp
    res = accum + Is
    return res
Exemple #6
0
    def copy_origins(cls, annotated: Chem.Mol, target: Chem.Mol):
        """
        Fragmenstein leaves a note of what it did. atom prop _Origin is a json of a list of mol _Name dot AtomIdx.
        However, the atom order seems to be maintained but I dont trust it. Also dummy atoms are stripped.

        :param annotated:
        :param target:
        :return: a list of origins
        """
        mcs = rdFMCS.FindMCS([target, annotated],
                             atomCompare=rdFMCS.AtomCompare.CompareElements,
                             bondCompare=rdFMCS.BondCompare.CompareAny,
                             ringMatchesRingOnly=True)
        common = Chem.MolFromSmarts(mcs.smartsString)
        dmapping = dict(
            zip(target.GetSubstructMatch(common),
                annotated.GetSubstructMatch(common)))
        origins = []
        for i in range(target.GetNumAtoms()):
            if i in dmapping:
                atom = annotated.GetAtomWithIdx(dmapping[i])
                tatom = target.GetAtomWithIdx(i)
                o = cls._get_origin(atom)
                tatom.SetProp('_Origin', json.dumps(o))
        return origins
def build_bond_features_and_mappings(
        mol: Chem.Mol, f_atoms: List) -> Tuple[list, list, list, list]:
    f_bonds = []
    a2b = [[] for _ in range(mol.GetNumAtoms())
           ]  # mapping from atom index to incoming bond indices
    b2a = [
    ]  # mapping from bond index to the index of the atom the bond is coming from
    b2revb = []  # mapping from bond index to the index of the reverse bond

    for bond in mol.GetBonds():
        a1 = bond.GetBeginAtom().GetIdx()
        a2 = bond.GetEndAtom().GetIdx()
        f_bond = get_bond_features(bond)

        f_bonds.append(f_atoms[a1] + f_bond)
        f_bonds.append(f_atoms[a2] + f_bond)

        # Update index mappings
        b1 = len(f_bonds) - 2
        b2 = b1 + 1
        b2a.append(a1)
        b2a.append(a2)
        a2b[a2].append(b1)  # b1 = a1 --> a2
        a2b[a1].append(b2)  # b2 = a2 --> a1
        b2revb.append(b2)
        b2revb.append(b1)

    return f_bonds, a2b, b2a, b2revb
Exemple #8
0
    def process(self, mol: chem.Mol, atom_map: Dict[int, int]) -> GCNGraph:
        n = mol.GetNumAtoms() + 1  # allocate a new node for graph embedding

        # all edges (including all self-loops) as index
        begin_idx = [u.GetBeginAtomIdx()
                     for u in mol.GetBonds()] + [n - 1] * (n - 1)
        end_idx = [u.GetEndAtomIdx()
                   for u in mol.GetBonds()] + list(range(n - 1))
        assert len(begin_idx) == len(end_idx)
        ran = list(range(n))
        index = [begin_idx + end_idx + ran, end_idx + begin_idx + ran]

        # construct coefficients adjacent matrix
        deg = torch.tensor(
            [sqrt(1 / (len(u.GetNeighbors()) + 2))
             for u in mol.GetAtoms()] + [sqrt(1 / n)],
            device=self.device)
        coeff = deg.reshape(-1, 1) @ deg[None, :]  # pairwise coefficients
        adj = torch.zeros((n, n), device=self.device)
        adj[index] = coeff[index]

        # node embedding
        num = torch.tensor(
            [atom_map[u.GetAtomicNum()]
             for u in mol.GetAtoms()] + [len(atom_map)],
            device=self.device)

        return GCNGraph(n, adj, num)
Exemple #9
0
def _GetBurdenMatrix(mol: Chem.Mol, propertylabel: str = 'm') -> numpy.matrix:
    """Calculate weighted Burden matrix and eigenvalues."""
    mol = Chem.AddHs(mol)
    Natom = mol.GetNumAtoms()
    AdMatrix = Chem.GetAdjacencyMatrix(mol)
    bondindex = numpy.argwhere(AdMatrix)
    AdMatrix1 = numpy.array(AdMatrix, dtype=numpy.float32)
    # The diagonal elements of B, Bii, are either given by
    # the carbon normalized atomic mass,
    # van der Waals volume, Sanderson electronegativity,
    # and polarizability of atom i.
    for i in range(Natom):
        atom = mol.GetAtomWithIdx(i)
        temp = GetRelativeAtomicProperty(element=atom.GetSymbol(), propertyname=propertylabel)
        AdMatrix1[i, i] = round(temp, 3)
    # The element of B connecting atoms i and j, Bij,
    # is equal to the square root of the bond
    # order between atoms i and j.
    for i in bondindex:
        bond = mol.GetBondBetweenAtoms(int(i[0]), int(i[1]))
        if bond.GetBondType().name == 'SINGLE':
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1), 3)
        if bond.GetBondType().name == "DOUBLE":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(2), 3)
        if bond.GetBondType().name == "TRIPLE":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(3), 3)
        if bond.GetBondType().name == "AROMATIC":
            AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1.5), 3)
    # All other elements of B (corresponding non bonded
    # atom pairs) are set to 0.001
    bondnonindex = numpy.argwhere(AdMatrix == 0)
    for i in bondnonindex:
        if i[0] != i[1]:
            AdMatrix1[i[0], i[1]] = 0.001
    return numpy.real(numpy.linalg.eigvals(AdMatrix1))
Exemple #10
0
def _compute_sas(mol: Mol, sa_model: Dict[int, float]) -> float:
    fp = rdMolDescriptors.GetMorganFingerprint(mol, 2)
    fps = fp.GetNonzeroElements()
    score1 = 0.
    nf = 0
    # for bitId, v in fps.items():
    for bitId, v in fps.items():
        nf += v
        sfp = bitId
        score1 += sa_model.get(sfp, -4) * v
    score1 /= nf

    # features score
    nAtoms = mol.GetNumAtoms()
    nChiralCenters = len(FindMolChiralCenters(mol, includeUnassigned=True))
    ri = mol.GetRingInfo()
    nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol)
    nBridgeheads = rdMolDescriptors.CalcNumBridgeheadAtoms(mol)
    nMacrocycles = 0
    for x in ri.AtomRings():
        if len(x) > 8:
            nMacrocycles += 1

    sizePenalty = nAtoms**1.005 - nAtoms
    stereoPenalty = math.log10(nChiralCenters + 1)
    spiroPenalty = math.log10(nSpiro + 1)
    bridgePenalty = math.log10(nBridgeheads + 1)
    macrocyclePenalty = 0.

    # ---------------------------------------
    # This differs from the paper, which defines:
    # macrocyclePenalty = math.log10(nMacrocycles+1)
    # This form generates better results when 2 or more macrocycles are present
    if nMacrocycles > 0:
        macrocyclePenalty = math.log10(2)

    score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty

    # correction for the fingerprint density
    # not in the original publication, added in version 1.1
    # to make highly symmetrical molecules easier to synthetise
    score3 = 0.
    if nAtoms > len(fps):
        score3 = math.log(float(nAtoms) / len(fps)) * .5

    sascore = score1 + score2 + score3

    # need to transform "raw" value into scale between 1 and 10
    min = -4.0
    max = 2.5
    sascore = 11. - (sascore - min + 1) / (max - min) * 9.
    # smooth the 10-end
    if sascore > 8.:
        sascore = 8. + math.log(sascore + 1. - 9.)
    if sascore > 10.:
        sascore = 10.0
    elif sascore < 1.:
        sascore = 1.0

    return sascore
def construct_discrete_edge_matrix(mol: Chem.Mol):
    if mol is None:
        return None
    N = mol.GetNumAtoms()
    #adj = Chem.rdmolops.GetAdjacencyMatrix(mol)
    #size = adj.shape[0]
    size = MAX_NUMBER_ATOM
    adjs = numpy.zeros((4, size, size), dtype=numpy.float32)
    for i in range(N):
        for j in range(N):
            bond = mol.GetBondBetweenAtoms(i, j)  # type: Chem.Bond
            if bond is not None:
                bondType = str(bond.GetBondType())
                if bondType == 'SINGLE':
                    adjs[0, i, j] = 1.0
                elif bondType == 'DOUBLE':
                    adjs[1, i, j] = 1.0
                elif bondType == 'TRIPLE':
                    adjs[2, i, j] = 1.0
                elif bondType == 'AROMATIC':
                    adjs[3, i, j] = 1.0
                else:
                    print("[ERROR] Unknown bond type", bondType)
                    assert False  # Should not come here
    return adjs
Exemple #12
0
def subset_rdmol(rdmol: Chem.Mol,
                atom_indices: Iterable[int],
                check_bonds: bool = True,
                return_atom_indices: bool = False) -> Chem.Mol:
    rdmol = Chem.RWMol(rdmol)
    to_remove = [i for i in range(rdmol.GetNumAtoms()) if i not in atom_indices]
    if check_bonds:
        multiple_bonds = []
        # check bonds
        for i in to_remove:
            atom = rdmol.GetAtomWithIdx(i)
            n_bonds = 0
            for bond in atom.GetBonds():
                other = bond.GetOtherAtomIdx(i)
                if other in atom_indices:
                    n_bonds += 1
            if n_bonds > 1:
                multiple_bonds.append(i)
        atom_indices = sorted(atom_indices + multiple_bonds)
        to_remove = [i for i in to_remove if i not in multiple_bonds]
    for i in to_remove[::-1]:
        rdmol.RemoveAtom(i)
    rdmol.UpdatePropertyCache()
    if return_atom_indices:
        return rdmol, atom_indices
    return rdmol
 def merge(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol,
           anchor_index: int, attachment_details: List[Dict]) -> Chem.Mol:
     for detail in attachment_details:
         attachment_index = detail['idx_F']  # fragmentanda attachment_index
         scaffold_attachment_index = detail['idx_S']
         bond_type = detail['type']
         f = Chem.FragmentOnBonds(fragmentanda, [
             fragmentanda.GetBondBetweenAtoms(anchor_index,
                                              attachment_index).GetIdx()
         ],
                                  addDummies=False)
         frag_split = []
         fragmols = Chem.GetMolFrags(f,
                                     asMols=True,
                                     fragsMolAtomMapping=frag_split,
                                     sanitizeFrags=False)
         if self._debug_draw:
             print(frag_split)
         # Get the fragment of interest.
         ii = 0
         for mol_N, indices in enumerate(frag_split):
             if anchor_index in indices:
                 break
             ii += len(indices)
         else:
             raise Exception
         frag = fragmols[mol_N]
         frag_anchor_index = indices.index(anchor_index)
         if self._debug_draw:
             self.draw_nicely(frag)
         combo = Chem.RWMol(rdmolops.CombineMols(scaffold, frag))
         scaffold_anchor_index = frag_anchor_index + scaffold.GetNumAtoms()
         if self._debug_draw:
             print(scaffold_anchor_index, scaffold_attachment_index,
                   anchor_index, scaffold.GetNumAtoms())
             self.draw_nicely(combo)
         combo.AddBond(scaffold_anchor_index, scaffold_attachment_index,
                       bond_type)
         Chem.SanitizeMol(
             combo,
             sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS +
             Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY,
             catchErrors=True)
         if self._debug_draw:
             self.draw_nicely(combo)
         scaffold = combo
     return scaffold
Exemple #14
0
def CalculateMeanWeiner(mol: Chem.Mol) -> float:
    """Get Mean Weiner index of a molecule.

    Or AW.
    """
    N = mol.GetNumAtoms()
    WeinerNumber = CalculateWeiner(mol)
    return 2.0 * WeinerNumber / (N * (N - 1))
Exemple #15
0
def CalculateQuadratic(mol: Chem.Mol) -> float:
    """Get Quadratic index.

    Or Qindex.
    """
    M = CalculateZagreb1(mol)
    N = mol.GetNumAtoms()
    return 3 - 2 * N + M / 2.0
Exemple #16
0
def CalculateArithmeticTopoIndex(mol: Chem.Mol) -> float:
    """Get Arithmetic topological index.

    Or Arto.
    From Narumi H., MATCH (Comm. Math. Comp. Chem.), (1987), 22,195-207.
    """
    nAtoms = mol.GetNumAtoms()
    nBonds = mol.GetNumBonds()
    res = 2. * nBonds / nAtoms
    return res
 def match(self, mol: Chem.Mol) -> List[np.ndarray]:
     matches = self.substruct_matches(mol)
     mol_size = mol.GetNumAtoms()
     dense_matches = [
         _sparse_to_dense(index_list, mol_size) for index_list in matches
     ]
     all_matches = [
         _reduce_logical_or(match_set, mol_size)
         for match_set in _nonnull_powerset(dense_matches)
     ]
     return all_matches
Exemple #18
0
def _CalculateAtomEState(mol: Chem.Mol, AtomicNum=6) -> float:
    """Calculate the sum of the EState indices over all atoms with specified atomic number."""
    nAtoms = mol.GetNumAtoms()
    Is = numpy.zeros(nAtoms, numpy.float)
    Estate = _CalculateEState(mol)
    for i in range(nAtoms):
        at = mol.GetAtomWithIdx(i)
        atNum = at.GetAtomicNum()
        if atNum == AtomicNum:
            Is[i] = Estate[i]
    res = sum(Is)
    return res
 def match(self, mol: Chem.Mol) -> List[np.ndarray]:
     subrule_matches = [logic.match(mol) for logic in self.fragment_logics]
     mol_size = mol.GetNumAtoms()
     composite_matches = []
     for combination in itertools.product(*subrule_matches):
         if self.rule_type == 'OR':
             for match_subset in _nonnull_powerset(combination):
                 composite_matches.append(
                     _reduce_logical_or(match_subset, mol_size))
         elif self.rule_type == 'AND':
             composite_matches.append(
                 _reduce_logical_or(combination, mol_size))
     return composite_matches
Exemple #20
0
def CalculateGutmanTopo(mol: Chem.Mol) -> float:
    """Get Gutman molecular topological simple vertex index.

    Or GMTI.
    """
    nAT = mol.GetNumAtoms()
    deltas = [x.GetDegree() for x in mol.GetAtoms()]
    Distance = Chem.GetDistanceMatrix(mol)
    res = 0.0
    for i in range(nAT):
        for j in range(i + 1, nAT):
            res = res + deltas[i] * deltas[j] * Distance[i, j]
    return numpy.log10(res)
    def add_names(cls, mol: Chem.Mol, names: List[str], name:Optional[str]=None) -> Chem.Mol:
        """
        Quick way to add atom names to a mol object --adds them the normal way.

        :param mol: Chem.Mol, will actually be edited in place.
        :param names: list of unique names.
        :param name: 3letter code for the molecule.
        :return: the mol
        """
        assert len(set(names)) == len(names), 'Atom Names are repeated.'
        if mol.GetNumAtoms() > len(names):
            warn('There are more atoms in mol than were provided.')
        elif mol.GetNumAtoms() < len(names):
            raise ValueError('There are less atoms in mol than were provided.')
        self = cls()
        if name is not None:
            self.NAME = name
        self.mol = mol
        self.fix_mol()
        for name, atom in zip(names, self.mol.GetAtoms()):
            info = atom.GetPDBResidueInfo().SetName(name)
        return self.mol