Python Smarts Examples, openbabel.pybel.Smarts Python Examples

Example #1

0

Show file

File: cheminfolib.py Project: pk-organics/galaxytools

def get_properties_ext(mol):
    HBD = pybel.Smarts("[!#6;!H0]")
    HBA = pybel.Smarts(("[$([$([#8,#16]);!$(*=N~O);"
                        "!$(*~N=O);X1,X2]),$([#7;v3;"
                        "!$([nH]);!$(*(-a)-a)])]"))
    calc_desc_dict = mol.calcdesc()

    try:
        logp = calc_desc_dict['logP']
    except KeyError:
        logp = calc_desc_dict['LogP']

    return {
        "molwt": mol.molwt,
        "logp": logp,
        "donors": len(HBD.findall(mol)),
        "acceptors": len(HBA.findall(mol)),
        "psa": calc_desc_dict['TPSA'],
        "mr": calc_desc_dict['MR'],
        "rotbonds": mol.OBMol.NumRotors(),
        "can": mol.write("can").split()[0].strip(
        ),  # tthis one works fine for both zinc and chembl (no ZINC code added after can descriptor string)
        "inchi": mol.write("inchi").strip(),
        "inchi_key": get_inchikey(mol).strip(),
        "rings": len(mol.sssr),
        "atoms": mol.OBMol.NumHvyAtoms(),
        "spectrophore": OBspectrophore(mol),
    }

Example #2

0

Show file

def coarse(mol, bead_list):
    """
    Creates a coarse-grained (CG) compound given a starting structure and
    smart strings for desired beads.

    Parameters
    ----------
    mol : pybel.Molecule
    bead_list : list of tuples of strings, desired bead name
    followed by SMARTS string of that bead

    Returns
    -------
    CG_Compound
    """
    matches = []
    for i, item in enumerate(bead_list):
        bead_name, smart_str = item
        smarts = pybel.Smarts(smart_str)
        if not smarts.findall(mol):
            print(f"{smart_str} not found in compound!")
        for group in smarts.findall(mol):
            group = tuple(i - 1 for i in group)
            matches.append((group, smart_str, bead_name))

    seen = set()
    bead_inds = []
    for group, smarts, name in matches:
        # smart strings for rings can share atoms
        # add bead regardless of whether it was seen
        if has_number(smarts):
            for atom in group:
                seen.add(atom)
            bead_inds.append((group, smarts, name))
        # alkyl chains should be exclusive
        else:
            if has_common_member(seen, group):
                pass
            else:
                for atom in group:
                    seen.add(atom)
                bead_inds.append((group, smarts, name))

    n_atoms = mol.OBMol.NumHvyAtoms()
    if n_atoms != len(seen):
        print(
            "WARNING: Some atoms have been left out of coarse-graining!"
        )  # TODO make this more informative

    comp = CG_Compound.from_pybel(mol)
    cg_compound = cg_comp(comp, bead_inds)
    cg_compound = cg_bonds(comp, cg_compound, bead_inds)

    cg_compound.atomistic = comp

    return cg_compound

Example #3

0

Show file

    def get_label(self, smiles_string):
        result = []
        # result2 = {}
        mol = pybel.readstring('smi', smiles_string)
        for smarts in self.functional_group_smarts:
            smart = pybel.Smarts(smarts)
            if len(smart.findall(mol)) > 0:
                result.append(1)
                # result2[smarts] = True
            else:
                result.append(0)
                # result2[smarts] = False

        return result

Example #4

0

Show file

File: tools.py Project: jonwzheng/RMG-website

        def MatchPlattsBGroups(self, smiles):
            # Load functional group database
            current_dir = os.getcwd()
            filepath = os.path.join(current_dir, 'groups.xls')
            wb = xlrd.open_workbook(filepath)
            wb.sheet_names()

            data = wb.sheet_by_name('PlattsB')
            col1 = data.col_values(0)
            col2 = data.col_values(1)
            col3 = data.col_values(2)

            databaseB = []
            for SMART, name, B in zip(col1, col2, col3):
                databaseB.append(functionalgroup(SMART, name, B))

            platts_B = 0
            mol = pybel.readstring("smi", smiles)
            for x in databaseB:
                # Initialize with dummy SMLES to check for validity of real one
                smarts = pybel.Smarts("CC")
                smarts.obsmarts = ob.OBSmartsPattern()
                success = smarts.obsmarts.Init(x.smarts.__str__())
                if success:
                    smarts = pybel.Smarts(x.smarts.__str__())
                else:
                    print("Invalid SMARTS pattern", x.smarts.__str__())
                    break
                matched = smarts.findall(mol)
                x.num = len(matched)
                if (x.num > 0):
                    print("Found group", x.smarts.__str__(),
                          'named', x.name, 'with contribution',
                          x.value, 'to B', x.num, 'times')
                platts_B += (x.num) * (x.value)

            self.B = platts_B + 0.071

Example #5

0

Show file

    def smart_feats(self,molecule):
        __PATTERNS = []
        SMARTS = [
                    '[#6+0!$(*~[#7,#8,F]),SH0+0v2,s+0,S^3,Cl+0,Br+0,I+0]',
                    '[a]',
                    '[!$([#1,#6,F,Cl,Br,I,o,s,nX3,#7v5,#15v5,#16v4,#16v6,*+1,*+2,*+3])]',
                    '[!$([#6,H0,-,-2,-3]),$([!H0;#7,#8,#9])]',
                    '[r]'
                ]
        smarts_labels = ['hydrophobic', 'aromatic', 'acceptor', 'donor',
                                 'ring']
        for smarts in SMARTS:
            __PATTERNS.append(pybel.Smarts(smarts))
        features = np.zeros((len(molecule.atoms), len(__PATTERNS)))

        for (pattern_id, pattern) in enumerate(__PATTERNS):
            atoms_with_prop = np.array(list(*zip(*pattern.findall(molecule))),
                                           dtype=int) - 1
            features[atoms_with_prop, pattern_id] = 1.0
        return features

Example #6

0

Show file

File: coarsegrain.py Project: jennyfothergill/grits

    def _set_mapping(self, beads, mol, allow_overlap):
        """Set the mapping attribute."""
        matches = []
        for bead_name, smart_str in beads.items():
            smarts = pybel.Smarts(smart_str)
            if not smarts.findall(mol):
                warn(f"{smart_str} not found in compound!")
            for group in smarts.findall(mol):
                group = tuple(i - 1 for i in group)
                matches.append((group, smart_str, bead_name))

        seen = set()
        mapping = defaultdict(list)
        for group, smarts, name in matches:
            if allow_overlap:
                # smart strings for rings can share atoms
                # add bead regardless of whether it was seen
                if has_number(smarts):
                    seen.update(group)
                    mapping[f"{name}...{smarts}"].append(group)
                # alkyl chains should be exclusive
                else:
                    if has_common_member(seen, group):
                        pass
                    else:
                        seen.update(group)
                        mapping[f"{name}...{smarts}"].append(group)
            else:
                if has_common_member(seen, group):
                    pass
                else:
                    seen.update(group)
                    mapping[f"{name}...{smarts}"].append(group)

        n_atoms = mol.OBMol.NumHvyAtoms()
        if n_atoms != len(seen):
            warn("Some atoms have been left out of coarse-graining!")
            # TODO make this more informative
        self.mapping = mapping

Example #7

0

Show file

def find_ligands_lipophilic(mols, verbose):
    """Finds lipophilic fragments in all ligands

    :param mols: list of Pybel-parsed ligands' objects
    :type mole: list
    :return: dictionary indexed by ligand name, with the coords od all ligand's lipophilic fragments
    :rtype: dict
    """

    # SMARTS pattern:
    # [CH0,CH1,CH2,#9,#17,#35,#53] - aliphatic C with 0,1 or 2 H (ie not CH3) or halogens
    # ;+0   and only neutral (charge zero)
    # ;!$(C~O);!$(C~N)  and not C=O, C=N with any bonds
    # ;!$(*~[+1]);!$(*~[-1]) and not connected to a cation or anion
    # ICM: [C&!$(C=O)&!$(C#N),S&^3,#17,#15,#35,#53]

    # modified ICM: also aromatic C and must be neutral.

    smarts = pybel.Smarts("[c,C&!$(C=O)&!$(C#N),S&^3,s,#17,#15,#35,#53;+0]")
    dictionary = {}

    if verbose:
        print("Looking for lipophilic fragments...")

    for i in tqdm(range(len(mols)),
                  disable=(not verbose)):  # for molecule in ligand file

        name = get_ligand_name_pose(dictionary, mols[i].title)
        dictionary[name] = []  # {'prefix^pose':[list of tuples (C,halogen)]}

        atomSets = smarts.findall(
            mols[i])  # list of atoms fulfilling this pattern
        atomsList = [id[0] for id in atomSets]

        for atom in atomsList:
            dictionary[name].append(mols[i].atoms[atom - 1].coords)

    return dictionary

Example #8

0

Show file

File: testbindings.py Project: Debayan-Saha/openbabel

 def testSmartsSupportsHashZero(self):
     """Ensure that we can match asterisks in SMILES with SMARTS"""
     mol = pybel.readstring("smi", "*O")
     # The following used to raise an OSError (SMARTS parse failure)
     matches = pybel.Smarts("[#0]O").findall(mol)
     self.assertEqual(matches, [(1, 2)])

Example #9

0

Show file

import sys
import csv
from openbabel import openbabel as ob
from openbabel import pybel
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
###############################

__doc__ = """Performs calculation of physiochemical properties of potential antibiotics. SMILES strings are parsed,
conformers are generated, and properties calculated. Properties include: chemical formula, molecular weight, rotatable
bonds, globularity, and PBF.
"""

FUNCTIONAL_GROUP_TO_SMARTS = {
    'primary_amine': pybel.Smarts('[$([N;H2;X3][CX4]),$([N;H3;X4+][CX4])]')
}
FUNCTIONAL_GROUPS = sorted(FUNCTIONAL_GROUP_TO_SMARTS.keys())


def main():
    args = parse_args(sys.argv[1:])
    if (args.smiles):
        mol = smiles_to_ob(args.smiles)
        properties = average_properties(mol)
        properties['smiles'] = args.smiles
        # A file will be written if command line option provide, otherwise write to stdout
        if (args.output):
            mols_to_write = [properties]
            write_csv(mols_to_write, args.output)
        else:

Example #10

0

Show file

 def compile_smarts(self):
     self.__PATTERNS = []
     for smarts in self.SMARTS:
         self.__PATTERNS.append(pybel.Smarts(smarts))

Example #11

0

Show file

def get_chromo_ids_smiles(snap, smarts_str, conversion_dict=None):
    """Get the atom indices in a snapshot associated with a SMARTS string.

    This function can be used to determine the atom indices for each
    chromophore. SMARTS matching depends on the molecular structures making
    chemical sense (e.g., aromatic structures are planar, etc). Often snapshots
    from molecular simulations based on classical methods (e.g., MC, MD) may
    have distortions that are chemically unphysical, in which case this function
    may not find all chromophores. A solution is to use this function on a
    snapshot of the initial frame of the trajectory, and then apply these
    indices to a later frame.

    Parameters
    ----------
    snap : gsd.hoomd.Snapshot
        Atomistic simulation snapshot from a GSD file. It is expected that the
        lengths in this file have been converted to Angstroms.
    smarts_str : str
        SMARTS string used to find the atom indices.
    conversion_dict : dictionary, default None
        A dictionary that maps the atom type to its element. e.g., `{'c3': C}`.
        An instance that maps AMBER types to their element can be found in
        `amber_dict`. If None is given, assume the particles already have
        element names.

    Returns
    -------
    list of numpy.ndarray of int
        atom indices of each SMARTS match

    Note
    ----
    If no matches are found, a warning is raised and the pybel.Molecule object
    is returned for debugging.
    """
    box = snap.configuration.box[:3]
    unwrapped_positions = snap.particles.position + snap.particles.image * box

    mol = openbabel.OBMol()
    for i, typeid in enumerate(snap.particles.typeid):
        a = mol.NewAtom()
        if conversion_dict is not None:
            element = conversion_dict[snap.particles.types[typeid]]
        else:
            element = ele.element_from_symbol(snap.particles.types[typeid])
        a.SetAtomicNum(element.atomic_number)
        a.SetVector(*[float(x) for x in unwrapped_positions[i]])

    for i, j in snap.bonds.group:
        # openbabel indexes atoms from 1
        # AddBond(i_index, j_index, bond_order)
        mol.AddBond(int(i + 1), int(j + 1), 1)

    # This will correctly set the bond order
    # (necessary for smarts matching)
    mol.PerceiveBondOrders()
    mol.SetAromaticPerceived()

    pybelmol = pybel.Molecule(mol)

    smarts = pybel.Smarts(smarts_str)
    # shift indices by 1
    atom_ids = [np.array(i) - 1 for i in smarts.findall(pybelmol)]
    if not atom_ids:
        warn(f"No matches found for smarts string {smarts_str}. " +
             "Please check the returned pybel.Molecule for errors.\n")
        return pybelmol
    print(f"Found {len(atom_ids)} chromophores.")
    return atom_ids

Example #12

0

Show file

def sort_atoms(inpf, ftype=None, reorder_frag=False, from_string=False):
    '''
    inpf: input chemical file name
    reorder_frag: whether to sort the fragments or not

    return: [List(str), List(List(int))] the canonical SMILES string(s) and atom indices corresponding to the canonical orders of fragments

    Note: Only read the first molecule in the file
    '''
    #pybel.ob.OBMessageHandler.SetOutputLevel(pybel.ob.OBMessageHandler(), pybel.ob.obError)
    #openbabel.OBMessageHandler.SetOutputLevel(openbabel.OBMessageHandler(), 3)
    openbabel.obErrorLog.SetOutputLevel(openbabel.obError)
    #openbabel.OBMessageHandler.StopLogging(openbabel.OBMessageHandler())
    #openbabel.OBMessageHandler.StopLogging(openbabel.OBMessageHandler())
    #print('output level', openbabel.OBMessageHandler.GetOutputLevel(openbabel.OBMessageHandler()))
    if ftype is None:
        ftype = openbabel.OBConversion.FormatFromExt(inpf)

    if from_string:
        mymols = list([pybel.readstring(ftype, inpf)])
    else:
        mymols = list(pybel.readfile(ftype, inpf))
    if len(mymols) == 0:
        return [], []

    mymol = list(mymols)[0]
    #smi = mymol.write('smi')
    smi = mymol.write('can')
    sms = smi.split()[0].split('.')
    sms = sorted(list(set(sms)))
    idx_out = []  #all atom
    sm_list = []

    natoms = mymol.OBMol.NumAtoms()
    conn = [[] for _k in range(natoms + 1)]  # connnections
    for atom in mymol:
        bonds = pybel.ob.OBAtomAtomIter(atom.OBAtom)
        for atom2 in bonds:
            atomic = atom2.GetAtomicNum()
            if atomic == 1:
                conn[atom.idx].append(atom2.GetIdx())
    for sm in sms:
        smarts = pybel.Smarts(sm)
        idxs_list = smarts.findall(mymol)
        for idxs in idxs_list:

            if len(idxs) == 0:
                continue
            #idx_list.append(idx)
            sm_list.append(sm)
            idx_out.append([])
            #out_list.append(idxs)
            for idx in idxs:
                idx_out[-1].extend([idx] + sorted(conn[idx]))
            idx_out[-1] = tuple(idx_out[-1])
    rank_size = np.argsort([len(_k) for _k in idx_out], axis=0)
    atom_added = set()
    idx_sel = set()
    for i in reversed(rank_size):
        # filter out sub-fragments that are same as other fragments
        idx = idx_out[i]
        if len(set(idx) & atom_added) == 0:
            atom_added.update(set(idx))
            idx_sel.add(i)

    if reorder_frag:
        idx_sorted = np.argsort(sm_list, axis=0)
    else:
        idx_sorted = np.argsort([_k[0] for _k in idx_out], axis=0)

    sm_list = [sm_list[_i] for _i in idx_sorted if _i in idx_sel]
    idx_out = [idx_out[_i] for _i in idx_sorted if _i in idx_sel]

    return sm_list, idx_out