def get_properties_ext(mol): HBD = pybel.Smarts("[!#6;!H0]") HBA = pybel.Smarts(("[$([$([#8,#16]);!$(*=N~O);" "!$(*~N=O);X1,X2]),$([#7;v3;" "!$([nH]);!$(*(-a)-a)])]")) calc_desc_dict = mol.calcdesc() try: logp = calc_desc_dict['logP'] except KeyError: logp = calc_desc_dict['LogP'] return { "molwt": mol.molwt, "logp": logp, "donors": len(HBD.findall(mol)), "acceptors": len(HBA.findall(mol)), "psa": calc_desc_dict['TPSA'], "mr": calc_desc_dict['MR'], "rotbonds": mol.OBMol.NumRotors(), "can": mol.write("can").split()[0].strip( ), # tthis one works fine for both zinc and chembl (no ZINC code added after can descriptor string) "inchi": mol.write("inchi").strip(), "inchi_key": get_inchikey(mol).strip(), "rings": len(mol.sssr), "atoms": mol.OBMol.NumHvyAtoms(), "spectrophore": OBspectrophore(mol), }
def coarse(mol, bead_list): """ Creates a coarse-grained (CG) compound given a starting structure and smart strings for desired beads. Parameters ---------- mol : pybel.Molecule bead_list : list of tuples of strings, desired bead name followed by SMARTS string of that bead Returns ------- CG_Compound """ matches = [] for i, item in enumerate(bead_list): bead_name, smart_str = item smarts = pybel.Smarts(smart_str) if not smarts.findall(mol): print(f"{smart_str} not found in compound!") for group in smarts.findall(mol): group = tuple(i - 1 for i in group) matches.append((group, smart_str, bead_name)) seen = set() bead_inds = [] for group, smarts, name in matches: # smart strings for rings can share atoms # add bead regardless of whether it was seen if has_number(smarts): for atom in group: seen.add(atom) bead_inds.append((group, smarts, name)) # alkyl chains should be exclusive else: if has_common_member(seen, group): pass else: for atom in group: seen.add(atom) bead_inds.append((group, smarts, name)) n_atoms = mol.OBMol.NumHvyAtoms() if n_atoms != len(seen): print( "WARNING: Some atoms have been left out of coarse-graining!" ) # TODO make this more informative comp = CG_Compound.from_pybel(mol) cg_compound = cg_comp(comp, bead_inds) cg_compound = cg_bonds(comp, cg_compound, bead_inds) cg_compound.atomistic = comp return cg_compound
def get_label(self, smiles_string): result = [] # result2 = {} mol = pybel.readstring('smi', smiles_string) for smarts in self.functional_group_smarts: smart = pybel.Smarts(smarts) if len(smart.findall(mol)) > 0: result.append(1) # result2[smarts] = True else: result.append(0) # result2[smarts] = False return result
def MatchPlattsBGroups(self, smiles): # Load functional group database current_dir = os.getcwd() filepath = os.path.join(current_dir, 'groups.xls') wb = xlrd.open_workbook(filepath) wb.sheet_names() data = wb.sheet_by_name('PlattsB') col1 = data.col_values(0) col2 = data.col_values(1) col3 = data.col_values(2) databaseB = [] for SMART, name, B in zip(col1, col2, col3): databaseB.append(functionalgroup(SMART, name, B)) platts_B = 0 mol = pybel.readstring("smi", smiles) for x in databaseB: # Initialize with dummy SMLES to check for validity of real one smarts = pybel.Smarts("CC") smarts.obsmarts = ob.OBSmartsPattern() success = smarts.obsmarts.Init(x.smarts.__str__()) if success: smarts = pybel.Smarts(x.smarts.__str__()) else: print("Invalid SMARTS pattern", x.smarts.__str__()) break matched = smarts.findall(mol) x.num = len(matched) if (x.num > 0): print("Found group", x.smarts.__str__(), 'named', x.name, 'with contribution', x.value, 'to B', x.num, 'times') platts_B += (x.num) * (x.value) self.B = platts_B + 0.071
def smart_feats(self,molecule): __PATTERNS = [] SMARTS = [ '[#6+0!$(*~[#7,#8,F]),SH0+0v2,s+0,S^3,Cl+0,Br+0,I+0]', '[a]', '[!$([#1,#6,F,Cl,Br,I,o,s,nX3,#7v5,#15v5,#16v4,#16v6,*+1,*+2,*+3])]', '[!$([#6,H0,-,-2,-3]),$([!H0;#7,#8,#9])]', '[r]' ] smarts_labels = ['hydrophobic', 'aromatic', 'acceptor', 'donor', 'ring'] for smarts in SMARTS: __PATTERNS.append(pybel.Smarts(smarts)) features = np.zeros((len(molecule.atoms), len(__PATTERNS))) for (pattern_id, pattern) in enumerate(__PATTERNS): atoms_with_prop = np.array(list(*zip(*pattern.findall(molecule))), dtype=int) - 1 features[atoms_with_prop, pattern_id] = 1.0 return features
def _set_mapping(self, beads, mol, allow_overlap): """Set the mapping attribute.""" matches = [] for bead_name, smart_str in beads.items(): smarts = pybel.Smarts(smart_str) if not smarts.findall(mol): warn(f"{smart_str} not found in compound!") for group in smarts.findall(mol): group = tuple(i - 1 for i in group) matches.append((group, smart_str, bead_name)) seen = set() mapping = defaultdict(list) for group, smarts, name in matches: if allow_overlap: # smart strings for rings can share atoms # add bead regardless of whether it was seen if has_number(smarts): seen.update(group) mapping[f"{name}...{smarts}"].append(group) # alkyl chains should be exclusive else: if has_common_member(seen, group): pass else: seen.update(group) mapping[f"{name}...{smarts}"].append(group) else: if has_common_member(seen, group): pass else: seen.update(group) mapping[f"{name}...{smarts}"].append(group) n_atoms = mol.OBMol.NumHvyAtoms() if n_atoms != len(seen): warn("Some atoms have been left out of coarse-graining!") # TODO make this more informative self.mapping = mapping
def find_ligands_lipophilic(mols, verbose): """Finds lipophilic fragments in all ligands :param mols: list of Pybel-parsed ligands' objects :type mole: list :return: dictionary indexed by ligand name, with the coords od all ligand's lipophilic fragments :rtype: dict """ # SMARTS pattern: # [CH0,CH1,CH2,#9,#17,#35,#53] - aliphatic C with 0,1 or 2 H (ie not CH3) or halogens # ;+0 and only neutral (charge zero) # ;!$(C~O);!$(C~N) and not C=O, C=N with any bonds # ;!$(*~[+1]);!$(*~[-1]) and not connected to a cation or anion # ICM: [C&!$(C=O)&!$(C#N),S&^3,#17,#15,#35,#53] # modified ICM: also aromatic C and must be neutral. smarts = pybel.Smarts("[c,C&!$(C=O)&!$(C#N),S&^3,s,#17,#15,#35,#53;+0]") dictionary = {} if verbose: print("Looking for lipophilic fragments...") for i in tqdm(range(len(mols)), disable=(not verbose)): # for molecule in ligand file name = get_ligand_name_pose(dictionary, mols[i].title) dictionary[name] = [] # {'prefix^pose':[list of tuples (C,halogen)]} atomSets = smarts.findall( mols[i]) # list of atoms fulfilling this pattern atomsList = [id[0] for id in atomSets] for atom in atomsList: dictionary[name].append(mols[i].atoms[atom - 1].coords) return dictionary
def testSmartsSupportsHashZero(self): """Ensure that we can match asterisks in SMILES with SMARTS""" mol = pybel.readstring("smi", "*O") # The following used to raise an OSError (SMARTS parse failure) matches = pybel.Smarts("[#0]O").findall(mol) self.assertEqual(matches, [(1, 2)])
import sys import csv from openbabel import openbabel as ob from openbabel import pybel import numpy as np from rdkit import Chem from rdkit.Chem import AllChem ############################### __doc__ = """Performs calculation of physiochemical properties of potential antibiotics. SMILES strings are parsed, conformers are generated, and properties calculated. Properties include: chemical formula, molecular weight, rotatable bonds, globularity, and PBF. """ FUNCTIONAL_GROUP_TO_SMARTS = { 'primary_amine': pybel.Smarts('[$([N;H2;X3][CX4]),$([N;H3;X4+][CX4])]') } FUNCTIONAL_GROUPS = sorted(FUNCTIONAL_GROUP_TO_SMARTS.keys()) def main(): args = parse_args(sys.argv[1:]) if (args.smiles): mol = smiles_to_ob(args.smiles) properties = average_properties(mol) properties['smiles'] = args.smiles # A file will be written if command line option provide, otherwise write to stdout if (args.output): mols_to_write = [properties] write_csv(mols_to_write, args.output) else:
def compile_smarts(self): self.__PATTERNS = [] for smarts in self.SMARTS: self.__PATTERNS.append(pybel.Smarts(smarts))
def get_chromo_ids_smiles(snap, smarts_str, conversion_dict=None): """Get the atom indices in a snapshot associated with a SMARTS string. This function can be used to determine the atom indices for each chromophore. SMARTS matching depends on the molecular structures making chemical sense (e.g., aromatic structures are planar, etc). Often snapshots from molecular simulations based on classical methods (e.g., MC, MD) may have distortions that are chemically unphysical, in which case this function may not find all chromophores. A solution is to use this function on a snapshot of the initial frame of the trajectory, and then apply these indices to a later frame. Parameters ---------- snap : gsd.hoomd.Snapshot Atomistic simulation snapshot from a GSD file. It is expected that the lengths in this file have been converted to Angstroms. smarts_str : str SMARTS string used to find the atom indices. conversion_dict : dictionary, default None A dictionary that maps the atom type to its element. e.g., `{'c3': C}`. An instance that maps AMBER types to their element can be found in `amber_dict`. If None is given, assume the particles already have element names. Returns ------- list of numpy.ndarray of int atom indices of each SMARTS match Note ---- If no matches are found, a warning is raised and the pybel.Molecule object is returned for debugging. """ box = snap.configuration.box[:3] unwrapped_positions = snap.particles.position + snap.particles.image * box mol = openbabel.OBMol() for i, typeid in enumerate(snap.particles.typeid): a = mol.NewAtom() if conversion_dict is not None: element = conversion_dict[snap.particles.types[typeid]] else: element = ele.element_from_symbol(snap.particles.types[typeid]) a.SetAtomicNum(element.atomic_number) a.SetVector(*[float(x) for x in unwrapped_positions[i]]) for i, j in snap.bonds.group: # openbabel indexes atoms from 1 # AddBond(i_index, j_index, bond_order) mol.AddBond(int(i + 1), int(j + 1), 1) # This will correctly set the bond order # (necessary for smarts matching) mol.PerceiveBondOrders() mol.SetAromaticPerceived() pybelmol = pybel.Molecule(mol) smarts = pybel.Smarts(smarts_str) # shift indices by 1 atom_ids = [np.array(i) - 1 for i in smarts.findall(pybelmol)] if not atom_ids: warn(f"No matches found for smarts string {smarts_str}. " + "Please check the returned pybel.Molecule for errors.\n") return pybelmol print(f"Found {len(atom_ids)} chromophores.") return atom_ids
def sort_atoms(inpf, ftype=None, reorder_frag=False, from_string=False): ''' inpf: input chemical file name reorder_frag: whether to sort the fragments or not return: [List(str), List(List(int))] the canonical SMILES string(s) and atom indices corresponding to the canonical orders of fragments Note: Only read the first molecule in the file ''' #pybel.ob.OBMessageHandler.SetOutputLevel(pybel.ob.OBMessageHandler(), pybel.ob.obError) #openbabel.OBMessageHandler.SetOutputLevel(openbabel.OBMessageHandler(), 3) openbabel.obErrorLog.SetOutputLevel(openbabel.obError) #openbabel.OBMessageHandler.StopLogging(openbabel.OBMessageHandler()) #openbabel.OBMessageHandler.StopLogging(openbabel.OBMessageHandler()) #print('output level', openbabel.OBMessageHandler.GetOutputLevel(openbabel.OBMessageHandler())) if ftype is None: ftype = openbabel.OBConversion.FormatFromExt(inpf) if from_string: mymols = list([pybel.readstring(ftype, inpf)]) else: mymols = list(pybel.readfile(ftype, inpf)) if len(mymols) == 0: return [], [] mymol = list(mymols)[0] #smi = mymol.write('smi') smi = mymol.write('can') sms = smi.split()[0].split('.') sms = sorted(list(set(sms))) idx_out = [] #all atom sm_list = [] natoms = mymol.OBMol.NumAtoms() conn = [[] for _k in range(natoms + 1)] # connnections for atom in mymol: bonds = pybel.ob.OBAtomAtomIter(atom.OBAtom) for atom2 in bonds: atomic = atom2.GetAtomicNum() if atomic == 1: conn[atom.idx].append(atom2.GetIdx()) for sm in sms: smarts = pybel.Smarts(sm) idxs_list = smarts.findall(mymol) for idxs in idxs_list: if len(idxs) == 0: continue #idx_list.append(idx) sm_list.append(sm) idx_out.append([]) #out_list.append(idxs) for idx in idxs: idx_out[-1].extend([idx] + sorted(conn[idx])) idx_out[-1] = tuple(idx_out[-1]) rank_size = np.argsort([len(_k) for _k in idx_out], axis=0) atom_added = set() idx_sel = set() for i in reversed(rank_size): # filter out sub-fragments that are same as other fragments idx = idx_out[i] if len(set(idx) & atom_added) == 0: atom_added.update(set(idx)) idx_sel.add(i) if reorder_frag: idx_sorted = np.argsort(sm_list, axis=0) else: idx_sorted = np.argsort([_k[0] for _k in idx_out], axis=0) sm_list = [sm_list[_i] for _i in idx_sorted if _i in idx_sel] idx_out = [idx_out[_i] for _i in idx_sorted if _i in idx_sel] return sm_list, idx_out