Example #1
0
def get_point_weight(mol, probe_rad=1.4):
    """Form a set of weighted points, with each point representing an atom in a molecule.
    Parameters:
        mol - rdkit.Chem.rdchem.Mol molecule
        probe_rad - probe radius, default of 1.4 Angstrom
    Returns the weights of the points (squared (VDW radius + probe radius)).
    """
    coor = mol.GetConformer().GetPositions()
    tbl = GetPeriodicTable()
    pts_num = coor.shape[0]
    weights = np.zeros(pts_num)
    for i in range(pts_num):
        weights[i] = (tbl.GetRvdw(mol.GetAtomWithIdx(int(i)).GetAtomicNum()) +
                      probe_rad)**2
    return weights
 def ReadAtomSuffix(self, tree, atom):
     constraint = None
     #'+','-','.',':','+.','-.','*'
     if tree[0] == '+.':
         constraint = AtomRadical(False, ConstraintNumber('=1'))
         atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(1))
     elif tree[0] == '-.':
         constraint = AtomRadical(False, ConstraintNumber('=1'))
         atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(-1))
     elif tree[0] == '+':
         atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(1))
     elif tree[0] == '-':
         atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(-1))
     elif tree[0] == '.':
         constraint = AtomRadical(False, ConstraintNumber('=1'))
     elif tree[0] == ':':
         constraint = AtomRadical(False, ConstraintNumber('=2'))
     elif tree[0] == ':.':
         constraint = AtomRadical(False, ConstraintNumber('=3'))
     elif tree[0] == '*':
         from rdkit.Chem import GetPeriodicTable
         #if type(atom).__name__ == 'QueryAtom':
         #    raise NotImplementedError('Onium $,&,X atoms not supported yet')
         atomicnum = atom.GetAtomicNum()
         atom = rdqueries.AtomNumEqualsQueryAtom(atomicnum)
         valence = GetPeriodicTable().GetDefaultValence(atomicnum)
         atom.ExpandQuery(rdqueries.TotalValenceEqualsQueryAtom(valence +
                                                                1))
         atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(1))
     elif tree[0] == '?':
         pass
     else:
         s = "Unsupported atom suffic: '" + tree[0] + "'"
         raise NotImplementedError(s)
     return constraint
 def __call__(self,comb_mol,mapped_index):
     atom = comb_mol.GetAtomWithIdx(mapped_index[self.idx])
     if self.valence != 0:
         from rdkit.Chem import GetPeriodicTable
         atomicnum = atom.GetAtomicNum()
         atom = rdqueries.AtomNumEqualsQueryAtom(atomicnum)
         valence = GetPeriodicTable().GetDefaultValence(atomicnum)
         atom.ExpandQuery(rdqueries.TotalValenceEqualsQueryAtom(valence+self.valence))
         atom.ExpandQuery(rdqueries.FormalChargeEqualsQueryAtom(self.charge))
         comb_mol.ReplaceAtom(self.idx,atom)
     
     atom.SetNumRadicalElectrons(self.radical)
     atom.SetFormalCharge(self.charge)
Example #4
0
def toStringRep(v):
    """Generate a string representation of a molvector
      :param v: molvector
    """
    p = GetPeriodicTable()
    chunksize = atom_size + bond_chunk_size
    nchunks = len(v) // chunksize
    m = RWMol()
    out = []
    for i in range(nchunks):
        start = i * (atom_size + bond_chunk_size)
        el, c, h, b1, o1, b2, o2, b3, o3, b4, o4 = v[start:start + chunksize]
        el = ("%2s" % p.GetElementSymbol(el)).replace(" ", "_")
        out.append(el)
        assert c < 10
        out.append(str(c))
        assert h < 10
        out.append(str(h))
        for btype, o in ((b1, o1), (b2, o2), (b3, o3), (b4, o4)):
            out.append(bond_symbols[btype])
            out.append("%04d" % o)
    return "".join(out)
Example #5
0
Element objects for each chemical element (1-112) have also been declared as 
module-level variables, using each element's symbol as its variable name. The
:meth:`getElement` method can also be used to retrieve the :class:`Element`
object associated with an atomic number or symbol. Generally applications will
want to use these objects, both to conserve memory and to make for easy
comparisons.
"""

import cython
from rdkit.Chem import GetPeriodicTable
from rmgpy.exceptions import ElementError

################################################################################

_rdkit_periodic_table = GetPeriodicTable()


class Element:
    """
    A chemical element. The attributes are:

    ============= =============== ================================================
    Attribute     Type            Description
    ============= =============== ================================================
    `number`      ``int``         The atomic number of the element
    `symbol`      ``str``         The symbol used for the element
    `name`        ``str``         The IUPAC name of the element
    `mass`        ``float``       The mass of the element in kg/mol
    `covRadius`   ``float``       Covalent bond radius in Angstrom
    `isotope`     ``int``         The isotope integer of the element
Example #6
0
                            type=str,
                            default=None,
                            help='Path to molecules to use as seeds')

    # Parse the arguments
    args = arg_parser.parse_args()
    run_params = args.__dict__

    # Get the list of elements
    #  We want those where SMILES supports implicit valences
    mpnn_dir = os.path.join('notebooks', 'mpnn-training')
    with open(os.path.join(mpnn_dir, 'atom_types.json')) as fp:
        atom_types = json.load(fp)
    with open(os.path.join(mpnn_dir, 'bond_types.json')) as fp:
        bond_types = json.load(fp)
    pt = GetPeriodicTable()
    if len(args.elements) == 0:
        elements = [pt.GetElementSymbol(i) for i in atom_types]
    else:
        elements = args.elements
    elements = [e for e in elements if MolFromSmiles(e) is not None]
    logger.info(f'Using {len(elements)} elements: {elements}')

    # Prepare the one-shot model. We the molecules to compare against and the comparison model
    with open(os.path.join('seed-molecules', 'top_100_pIC50.json')) as fp:
        comparison_mols = [convert_smiles_to_nx(s) for s in json.load(fp)]
    oneshot_dir = 'similarity'
    oneshot_model = load_model(os.path.join(oneshot_dir, 'oneshot_model.h5'),
                               custom_objects=custom_objects)
    with open(os.path.join(oneshot_dir, 'atom_types.json')) as fp:
        os_atom_types = json.load(fp)
Example #7
0
 def __init__(self):
     self.PeriodicaTable = GetPeriodicTable()
Example #8
0
def getOriginalIdentifiers(mol,
                           prop=[
                               'AtomicNumber', 'AtomicMass',
                               'TotalConnections', 'HCount',
                               'HeavyNeighborCount', 'FormalCharge',
                               'DeltaMass', 'IsTerminalAtom', 'SolidAngle',
                               'SolidAngleValue', 'SolidAngleSign'
                           ],
                           sa_dict=None,
                           includeAtoms=None,
                           radius=2,
                           hash_type='str',
                           idf_power=64):
    """Compute the original identifiers for atoms in a molecule based on atomic properties. 
       Note it only includes HEAVY atoms.
    Parameters:
        mol - rdkit.Chem.rdchem.Mol molecule
        prop - atomic property list
               'AtomicNumber': the atomic number of atom
               'AtomicMass': the mass of atom
               'TotalConnections': the degree of the atom in the molecule including Hs
               'HeavyNeighborCount': the number of heavy (non-hydrogen) neighbor atoms
               'HCount': the number of attached hydrogens (both implicit and explicit)
               'FormalCharge': the formal charge of atom
               'DeltaMass': the difference between atomic mass and atomic weight (weighted average of atomic masses)
               'IsTerminalAtom': indicates whether the atom is a terminal atom
               'SolidAngle': the solid angle of the atom on the molecule surface (> 0: convex, < 0: concave)
               'SolidAngleValue': the absolute solid angle of the atom on the molecule surface
               'SolidAngleSign': the sign of solid angle of the atom (-1, 0, 1)
        sa_dict - a dictionary mapping atom indices to their solid angles
        includeAtoms - atom indices for getting identifiers
        radius - ECFP radius, only calculates the identifiers of atoms in the neighborhoods (of radius) of included atoms (includeAtoms)
        hash_type - type for hash the properties, can be 'str' or 'vec'
        idf_power - power for the 'str' hash type (default 64-bit integers)
    Returns an dictionary mapping each heavy-atom index to an integer representing the atomic properties
    """
    tbl = GetPeriodicTable()
    idf_dict = {}
    nAtoms = mol.GetNumAtoms()
    if includeAtoms is None:
        indices = range(nAtoms)
    else:
        indices = includeAtoms
    for i in indices:
        index = int(i)
        env = list(
            Chem.FindAtomEnvironmentOfRadiusN(mol, radius, index, useHs=True))
        env_aids = set(
            [mol.GetBondWithIdx(bid).GetBeginAtomIdx() for bid in env] +
            [mol.GetBondWithIdx(bid).GetEndAtomIdx() for bid in env])
        for aid in env_aids:
            if (aid, 0) not in idf_dict:
                atom = mol.GetAtomWithIdx(aid)
                if atom.GetAtomicNum() > 1:
                    properties = []
                    if 'AtomicNumber' in prop:
                        properties.append(atom.GetAtomicNum())
                    if 'AtomicMass' in prop:
                        tmp_prop = atom.GetMass(
                        ) if hash_type == 'vec' else '%.2f' % atom.GetMass()
                        properties.append(tmp_prop)
                    if 'TotalConnections' in prop:
                        properties.append(atom.GetDegree())
                    if 'HCount' in prop:
                        properties.append(atom.GetNumExplicitHs())
                    if 'HeavyNeighborCount' in prop:
                        properties.append(
                            len([
                                bond.GetOtherAtom(atom)
                                for bond in atom.GetBonds()
                                if bond.GetOtherAtom(atom).GetAtomicNum() > 1
                            ]))
                    if 'FormalCharge' in prop:
                        tmp_prop = atom.GetFormalCharge(
                        ) if hash_type == 'vec' else '%.2f' % atom.GetFormalCharge(
                        )
                        properties.append(tmp_prop)
                    if 'DeltaMass' in prop:
                        tmp_prop = atom.GetMass() - tbl.GetAtomicWeight(
                            atom.GetAtomicNum())
                        tmp_prop = tmp_prop if hash_type == 'vec' else '%.2f' % tmp_prop
                        properties.append()
                    if 'IsTerminalAtom' in prop:
                        is_terminal_atom = 1 if atom.GetDegree() == 1 else 0
                        properties.append(is_terminal_atom)
                    if len([p for p in prop if 'SolidAngle' in p]) > 0:
                        sa = sa_dict[aid]
                        solang = 0 if (sa is None) else sa
                        if 'SolidAngle' in prop:
                            tmp_prop = solang if hash_type == 'vec' else '%.2f' % solang
                            properties.append(tmp_prop)
                        elif 'SolidAngleValue' in prop:
                            tmp_prop = abs(
                                solang
                            ) if hash_type == 'vec' else '%.2f' % abs(solang)
                            properties.append(tmp_prop)
                        else:
                            solang_sign = '0' if (sa in [None, 0]) else (
                                '+' if sa > 0 else '-')
                            properties.append(solang_sign)

                    if hash_type == 'str':
                        idf = hash_ecfp(ecfp=','.join(
                            [str(p) for p in properties]),
                                        power=idf_power)
                    elif hash_type == 'vec':
                        idf = hash(tuple(properties))
                    else:
                        print('Wrong hash type!')
                        return idf_dict

                    idf_dict[(aid, 0)] = idf

    return idf_dict
def get_atom_proplist(mol,
                      sa_dict=None,
                      aids=[],
                      base_prop=['AtomicMass'],
                      hash_type='str'):
    """
    Compute the average properties for a set of atoms in mol (indexed by aids).
    Parameters:
        mol - a rdkit.Chem.rdchem.Mol molecule
        sa_dict - a dictionary mapping atom indices to their solid angles
        aids - the indices of atoms
        base_prop - the property list for the computations
        hash_type - type for hash the properties, can be 'str' or 'vec'
    Returns the computed property list, for 'str' return ['xxx', 'xx.xx', ...] (float number are recorded as %.2f), for 'vec' return the prop list
    """

    tbl = GetPeriodicTable()
    proplist = []
    if len(aids) == 0:
        return proplist
    else:
        proplist = {
            'AtomicMass': 0,
            'TotalConnections': 0,
            'HCount': 0,
            'HeavyNeighborCount': 0,
            'FormalCharge': 0,
            'DeltaMass': 0,
            'SolidAngle': 0,
            'SolidAngleValue': 0,
            'SolidAngleSign': ''
        }
        # compute averaged property
        for aid in aids:
            atom = mol.GetAtomWithIdx(aid)
            if 'AtomicMass' in base_prop:
                proplist['AtomicMass'] += atom.GetMass()
            if 'TotalConnections' in base_prop:
                proplist['TotalConnections'] += atom.GetDegree()
            if 'HCount' in base_prop:
                proplist['HCount'] += atom.GetNumExplicitHs()
            if 'HeavyNeighborCount' in base_prop:
                proplist['HeavyNeighborCount'] += len([
                    bond.GetOtherAtom(atom) for bond in atom.GetBonds()
                    if bond.GetOtherAtom(atom).GetAtomicNum() > 1
                ])
            if 'FormalCharge' in base_prop:
                proplist['FormalCharge'] += atom.GetFormalCharge()
            if 'DeltaMass' in base_prop:
                proplist['DeltaMass'] += (
                    atom.GetMass() - tbl.GetAtomicWeight(atom.GetAtomicNum()))
            if len([p for p in base_prop if 'SolidAngle' in p]) > 0:
                sa = sa_dict[aid]
                tmp_prop = 0 if (sa is None) else sa
                proplist['SolidAngle'] += tmp_prop
        if 'SolidAngleValue' in base_prop:
            proplist['SolidAngleValue'] = abs(proplist['SolidAngle'])
        if 'SolidAngleSign' in base_prop:
            ref = proplist['SolidAngle']
            proplist['SolidAngleSign'] = '0' if ref == 0 else (
                '+' if ref > 0 else '-')
        # get str or vec for later hashing
        prop = {k: v for (k, v) in proplist.items() if k in base_prop}
        if hash_type == 'str':
            for key in prop:
                prop[key] = '%.2f' % (
                    prop[key] /
                    len(aids)) if key != 'SolidAngleSign' else prop[key]
        elif hash_type != 'vec':
            print('Wrong hash type!')
            return proplist

        return [v for (k, v) in prop.items()]
Example #10
0
#!/usr/bin/env python

from __future__ import division
import sys
import numpy as np
from rdkit.Chem import GetPeriodicTable
pt = GetPeriodicTable()


def skiplines(openfile, nlines=0):
    '''
    Function to skip nlines + 1 lines in openfile. In other words, if nlines=0 it will
    go to the next line.

    Parameters
    ----------
    openfile: object.
        File object to process.
    nlines: int.
        Number of lines to skip.

    Returns
    -------
    line: string.
        Line after skipping nlines + 1 lines.
    '''

    for i in range(nlines + 1):
        line = next(openfile)

    return line
Example #11
0
import json
import pickle
import copy
import math

from rdkit.Chem import (AddHs, MolFromSmiles, inchi, GetPeriodicTable,
                        Conformer, MolToSmiles)
from rdkit.Chem.AllChem import (EmbedMultipleConfs, UFFGetMoleculeForceField,
                                MMFFGetMoleculeForceField,
                                MMFFGetMoleculeProperties, GetConformerRMS)
from rdkit.Chem.rdmolops import RemoveHs, GetFormalCharge

from nff.utils.misc import read_csv, tqdm_enum
from nff.data.parallel import gen_parallel

PERIODICTABLE = GetPeriodicTable()

UFF_ELEMENTS = ['B', 'Al']
DEFAULT_GEOM_COMPARE_TIMEOUT = 300
XYZ_NAME = "{0}_Conf_{1}.xyz"
MAX_CONFS = 10000
AU_TO_KCAL = 627.509
KB_KCAL = 0.001985875


def write_xyz(coords, filename, comment):
    '''
    Write an xyz file from coords
    '''
    with open(filename, "w") as f_p:
        f_p.write(str(len(coords)) + "\n")
Example #12
0
def mol_to_dgl(mol):
    """Featurizes an rdkit mol object to a DGL Graph, with node and edge features

    Parameters
    ----------
    mol : rdkit mol

    Returns
    -------
    dgl.graph
    """
    g = dgl.DGLGraph()
    g.add_nodes(mol.GetNumAtoms())
    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)

    # Atom features

    atom_features = []

    pd = GetPeriodicTable()
    # ComputeGasteigerCharges(mol)

    for atom in mol.GetAtoms():
        atom_feat = []
        atom_type = [0] * len(ATOM_TYPES)
        atom_type[ATOM_TYPES.index(atom.GetSymbol())] = 1

        chiral = [0] * len(CHIRALITY)
        chiral[CHIRALITY.index(atom.GetChiralTag())] = 1

        ex_valence = atom.GetExplicitValence()
        charge = atom.GetFormalCharge()

        hybrid = [0] * len(HYBRIDIZATION)
        hybrid[HYBRIDIZATION.index(atom.GetHybridization())] = 1

        degree = atom.GetDegree()
        valence = atom.GetImplicitValence()
        aromatic = int(atom.GetIsAromatic())
        ex_hs = atom.GetNumExplicitHs()
        im_hs = atom.GetNumImplicitHs()
        rad = atom.GetNumRadicalElectrons()
        ring = int(atom.IsInRing())

        mass = pd.GetAtomicWeight(atom.GetSymbol())
        vdw = pd.GetRvdw(atom.GetSymbol())
        # pcharge = float(atom.GetProp("_GasteigerCharge"))

        atom_feat.extend(atom_type)
        atom_feat.extend(chiral)
        atom_feat.append(ex_valence)
        atom_feat.append(charge)
        atom_feat.extend(hybrid)
        atom_feat.append(degree)
        atom_feat.append(valence)
        atom_feat.append(aromatic)
        atom_feat.append(ex_hs)
        atom_feat.append(im_hs)
        atom_feat.append(rad)
        atom_feat.append(ring)
        atom_feat.append(mass)
        atom_feat.append(vdw)
        # atom_feat.append(pcharge)
        atom_features.append(atom_feat)

    for bond in mol.GetBonds():
        g.add_edge(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx())

    g.ndata["feat"] = torch.FloatTensor(atom_features)

    # Bond features

    bond_features = []
    for bond in mol.GetBonds():
        bond_feat = []

        bond_type = [0] * len(BOND_TYPES)
        bond_type[BOND_TYPES.index(bond.GetBondType())] = 1

        bond_stereo = [0] * len(BOND_STEREO)
        bond_stereo[BOND_STEREO.index(bond.GetStereo())] = 1

        bond_feat.extend(bond_type)
        bond_feat.extend(bond_stereo)
        bond_feat.append(float(bond.GetIsConjugated()))
        bond_feat.append(float(bond.IsInRing()))
        bond_features.append(bond_feat)

    g.edata["feat"] = torch.FloatTensor(bond_features)
    return g