Ejemplo n.º 1
0
def match(smiles):
    group_ids = []
    if smiles.find('.') > -1:  # ignore complex
        return group_ids

    py_mol = pybel.readstring('smi', smiles)
    py_mol.removeh()
    formula = py_mol.formula
    atom_set = set(Formula.read(formula).atomdict.keys())
    if not atom_set <= {'C', 'H', 'O', 'N', 'F', 'Cl', 'Br'}:
        return group_ids

    if not ('C' in atom_set and {'H', 'F', 'Cl', 'Br'} & atom_set != set()):
        return group_ids

    if atom_set == {'C', 'H'}:
        group_ids.append(smarts_id['hydrocarbon'])

        for s in ['[CX2]', '[CX3]', 'c', '[#6;v0,v1,v2,v3]']:
            if pybel.Smarts(s).findall(py_mol) != []:
                break
        else:
            group_ids.append(smarts_id['alkane'])

    if pybel.Smarts('[OH]').findall(py_mol).__len__() > 1:
        group_ids.append(smarts_id['diol'])

    for name, smarts in smarts_dict.items():
        if pybel.Smarts(smarts).findall(py_mol):
            group_ids.append(smarts_id[smarts])

    return group_ids
Ejemplo n.º 2
0
    def index(self, smiles):
        # bridged atoms
        bridg_Matcher = pybel.Smarts('[x3]')
        # spiro atoms
        spiro_Matcher = pybel.Smarts('[x4]')
        # linked rings
        RR_Matcher = pybel.Smarts('[R]!@[R]')
        # separated rings
        R_R_Matcher = pybel.Smarts('[R]!@*!@[R]')

        rd_mol: Mol = Chem.MolFromSmiles(smiles)
        py_mol = pybel.readstring('smi', smiles)

        index = [
                    py_mol.OBMol.NumHvyAtoms(),
                    int(round(py_mol.molwt, 1) * 10),
                    self.get_shortest_wiener(rd_mol)[0],
                    Chem.CalcNumRotatableBonds(Chem.AddHs(rd_mol)),
                    len(bridg_Matcher.findall(py_mol)),
                    len(spiro_Matcher.findall(py_mol)),
                    len(RR_Matcher.findall(py_mol)),
                    len(R_R_Matcher.findall(py_mol)),
                ] + \
                list(self.get_ring_info(py_mol))

        return np.array(index)
Ejemplo n.º 3
0
def run():
	
	inputfile=pybel.readfile(sys.argv[1].split(".")[-1],sys.argv[1])
	value=()
	for mol in inputfile:
		descvalues=mol.calcdesc()
		value= value+(descvalues.get('TPSA'),)
		value= value+(descvalues.get('HBD'),)
		value= value+(descvalues.get('logP'),)
		value= value+(descvalues.get('MW'),)
		value= value+(descvalues.get('tbonds'),)
		value= value+(descvalues.get('nF'),)
		value= value+(descvalues.get('bonds'),)
		value= value+(descvalues.get('atoms'),)
		value= value+(descvalues.get('HBA1'),)
		value= value+(descvalues.get('HBA2'),)
		value= value+(descvalues.get('sbonds'),)
		value= value+(descvalues.get('dbonds'),)
		value= value+(descvalues.get('MR'),)
		value= value+(descvalues.get('abonds'),)
	
		smarts = pybel.Smarts("[+]")
		num=smarts.findall(mol)				
		value= value+(len(num),)			
	
		smarts = pybel.Smarts("[-]")
		num=smarts.findall(mol)				
		value= value+(len(num),)


	model=joblib.load('volume_model/volume.pkl')
	for result in model.predict(value):
		return round(result,2)
Ejemplo n.º 4
0
def get_properties_ext(mol):

    HBD = pybel.Smarts("[!#6;!H0]")
    HBA = pybel.Smarts("[$([$([#8,#16]);!$(*=N~O);" +
                       "!$(*~N=O);X1,X2]),$([#7;v3;" +
                       "!$([nH]);!$(*(-a)-a)])]")
    calc_desc_dict = mol.calcdesc()

    try:
        logp = calc_desc_dict['logP']
    except:
        logp = calc_desc_dict['LogP']

    return {
        "molwt": mol.molwt,
        "logp": logp,
        "donors": len(HBD.findall(mol)),
        "acceptors": len(HBA.findall(mol)),
        "psa": calc_desc_dict['TPSA'],
        "mr": calc_desc_dict['MR'],
        "rotbonds": mol.OBMol.NumRotors(),
        "can": mol.write("can").split()[0].strip(
        ),  ### tthis one works fine for both zinc and chembl (no ZINC code added after can descriptor string)
        "inchi": mol.write("inchi").strip(),
        "inchi_key": get_inchikey(mol).strip(),
        "rings": len(mol.sssr),
        "atoms": mol.OBMol.NumHvyAtoms(),
        "spectrophore": OBspectrophore(mol),
    }
Ejemplo n.º 5
0
def main(substrate, group, sub_id, group_id, position, ref):

    # ========================> Generate Smiles < ========================= #
    if not group:
        smiles = substrate
    else:
        if position == 1:
            # ============= Put group at the ortho positions ============== #
            smiles = substrate[0:2] + "(" + group + ")" + substrate[2:]
        elif position == 2:
            # ============= Put group at the meta positions =============== #
            smiles = substrate[0:3] + "(" + group + ")" + substrate[3:]
        elif position == 3:
            # ============= Put group at the para positions =============== #
            smiles = substrate[0:4] + "(" + group + ")" + substrate[4:]
    print(smiles)

    # ============== Generate Folder Name ========== #
    folder = ip.iupac_name(smiles, substrate, group, sub_id, group_id,
                           position)
    print folder
    os.system('mkdir %s' % folder)
    os.chdir(folder)
    os.system('mkdir scratch')

    # ============== Copy data to folder =========== #
    #TODO

    # ============== Generate Molecule ========== #
    mol = confab.gen3d(smiles)

    # ============ Driving Coordinate Idx Generation ==============#
    smarts1 = pybel.Smarts(substrate)
    smarts2 = pybel.Smarts("cccccc")
    r1 = smarts1.findall(mol)
    r2 = smarts2.findall(mol)

    #============ confab and align to reference  ==================#
    r1 = [r1[0][-2], r1[0][-1]]
    r2 = list(r2[0])
    mol = confab.confab(mol, r1, r2)
    # => align to substructure <=#
    path = os.getcwd() + "/tmp.xyz"
    if ref is not None:
        cmd_str = "obabel %s %s -O %s -s %s --align" % (ref, path, path,
                                                        substrate)
        print cmd_str
        os.system(cmd_str)
        #read aligned geom
        mol = confab.read_molecules(path, single=False)
        mol = mol[1]

    #============ do zstruct ===================================== #
    zstruct2.zstruct1(mol, r1, r2, None, doOne=False, doTwo=True)
Ejemplo n.º 6
0
 def __init__(self, ec, substrate, product):
     self.ec = ec
     self.substrate = substrate
     self.product = product
     try:
         self.smarts_subs = pybel.Smarts(substrate)
         self.smarts_prod = pybel.Smarts(product)
     except IOError:
         logging.warning("failed parsing: %s >> %s" % (substrate, product))
         self.smarts_subs = None
         self.smarts_prod = None
Ejemplo n.º 7
0
def teste():
    phenol = pybel.Smarts("[OH]c1ccccc1")  # phenol
    ethyl = pybel.Smarts("[#6][#6]")  # ethyl group
    # mol = pybel.readfile("mol2", "resources/base.mol2")
    found_total = 0
    found = []
    for mol in pybel.readfile("mol2", "resources/base.mol2"):
        finded = phenol.findall(mol)
        found_total += len(finded)
        found.append(finded)
        return mol.draw(True)
Ejemplo n.º 8
0
 def count(self, smilesstr):
     ##
     groups, include, evalkw, brackets, quotes, unquote = self.commonattr()
     haskw, hasbracket, hasquote = self.matchedpatt(groups)
     if include is None:
         include = [True] * len(groups)
     ##
     mol = pybel.readstring('smi', smilesstr)
     mol.addh()
     molecule = mol  # copy reference; keyword for userdef.py 29.09.2015
     abundances = pd.Series([np.nan] * len(groups), index=groups.index)
     ## SMARTS search
     for key in groups.index[~haskw & ~hasbracket & ~hasquote]:
         abundances[key] = len(pybel.Smarts(groups[key]).findall(mol))
     ## evaluate eval keyword
     for key in groups.index[haskw]:  # untested
         abundances[key] = round(eval(evalkw.search(groups[key]).group(1)))
     ## evaluated quoted expressions
     for key in groups.index[hasquote]:  # untested
         abundances[key] = round(eval(
             unquote(groups[key]).format(**groups)))
     ## evaluate expressions
     orderedexpr = self.__orderexpr(groups, hasbracket, brackets)
     for key in orderedexpr:  #groups.index[hasbracket]:
         abundances[key] = round(eval(groups[key].format(**abundances)))
     ##
     return abundances[include].astype(int)
Ejemplo n.º 9
0
def pattern():
    global SMARTS
    PATTERNS = []
    for smarts in SMARTS:
        el = pybel.Smarts(smarts)
        PATTERNS.append(el)
    return PATTERNS
Ejemplo n.º 10
0
def integer_fp(smile, smartsqfile='SMARTSFileFull.json'):
	'''Creates an integer-valued fingerprint.  Probably takes longer since it is a VERY hacked version, but it generates
	data for SVMs.'''

	rdmol = Chem.MolFromSmiles(smile)

	if rdmol:
		smartsData = open(os.path.join(CHEMPATH, smartsqfile))
		encodedSmartsDict = json.load(smartsData)
		smartsData.close()
		smartsDict = {k.encode('utf-8'): v.encode('utf-8').split(': ')[1].strip(' \n') for k, v in encodedSmartsDict.iteritems()}

		fpt = [0] * len(smartsDict.keys())

		cpd = pybel.readstring('smi', smile)

		for k, v in smartsDict.iteritems():
			query = pybel.Smarts(v)
			fpt[int(k) - 1] = len(query.findall(cpd))

		#SMARTS querying directly for chirality does not give RS isomerism, so adding it at the end.  May get flexed to
		#_check_reverse for consistency.
		rect, sini = chem.flag_chiral(rdmol)

		fpt.append(rect)
		fpt.append(sini)

		return fpt

	else:

		return None
Ejemplo n.º 11
0
 def get_abundances(self, smilesstr=None):
     ## main body
     mol = pybel.readstring('smi', smilesstr)
     ## store SIMPOL patterns in ordered dictionary
     abundances = OrderedDict()
     for key, patt in self.smartspatt.items()[0:]:
         abundances[key] = 0 if 'nomatch' in patt else \
                           len(pybel.Smarts(patt).findall(mol))
     ## find auxiliary patterns
     aux = OrderedDict()
     for key, patt in self.smartsaux.items():
         aux[key] = len(pybel.Smarts(patt).findall(mol))
     ## combine and return pandas series
     abundances[
         'H'] = aux['h1'] + 2 * aux['h2'] + 3 * aux['h3'] + 4 * aux['h4']
     return pd.Series(abundances)
Ejemplo n.º 12
0
 def matchatoms(self, smilesstr):
     ##
     groups, include, evalkw, brackets, quotes, unquote = self.commonattr()
     haskw, hasbracket, hasquote = self.matchedpatt(groups)
     ##
     mol = pybel.readstring('smi', smilesstr)
     mol.addh()
     molecule = mol  # copy reference; keyword for userdef.py 29.09.2015
     tups = OrderedDict(zip(groups.index, [None] * len(groups)))
     ## SMARTS search
     for key in groups.index[~haskw & ~hasbracket & ~hasquote]:
         tups[key] = set(pybel.Smarts(groups[key]).findall(mol))
     ## evaluate eval keyword
     for key in groups.index[haskw]:
         tups[key] = eval(evalkw.search(groups[key]).group(1))
     ## evaluate quoted expressions
     for key in groups.index[hasquote]:  # untested
         tups[key] = self.__substitute(mol, groups[key], quotes, groups)
     ## evaluate expressions
     orderedexpr = self.__orderexpr(groups, hasbracket, brackets)
     for key in orderedexpr:  #groups.index[hasbracket]
         tups[key] = self.__substitute(mol, groups[key], brackets, tups)
     usetups = OrderedDict([(k, v) for (k, v) in tups.items()
                            if include.ix[k]])
     alltups = reduce(set.union, usetups.values())
     allatoms = reduce(add, map(list, alltups), [])
     atomicmass = set([(atom.type, atom.atomicmass) for atom in mol.atoms
                       if atom.idx in allatoms])
     ##
     idxlabel = 'atom'
     atomtype = pd.DataFrame([(atom.idx, atom.type) for atom in mol.atoms],
                             columns=[idxlabel, 'type']).set_index(idxlabel)
     matched = self.__atomtable(atomtype, usetups)
     ##
     return (matched, atomicmass)
Ejemplo n.º 13
0
def detect_dihedrals(mol: pybel.Molecule) -> List[DihedralInfo]:
    """Detect the bonds to be treated as rotors.
    
    We use the more generous definition from RDKit: 
    https://github.com/rdkit/rdkit/blob/1bf6ef3d65f5c7b06b56862b3fb9116a3839b229/rdkit/Chem/Lipinski.py#L47%3E
    
    It matches pairs of atoms that are connected by a single bond,
    both bonds have at least one other bond that is not a triple bond
    and they are not part of the same ring.
    
    Args:
        mol: Molecule to assess
    Returns:
        List of dihedral angles. Most are defined 
    """
    dihedrals = []

    # Compute the bonding graph
    g = get_bonding_graph(mol)

    # Get the indices of backbond atoms
    backbone = set(i for i, d in g.nodes(data=True) if d['z'] > 1)

    # Step 1: Get the bonds from a simple matching
    smarts = pybel.Smarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]')
    for i, j in smarts.findall(mol):
        dihedrals.append(get_dihedral_info(g, (i - 1, j - 1), backbone))
    return dihedrals
Ejemplo n.º 14
0
    def get_multiring_atoms_bonds(self, rdk_mol: Mol, smiles):
        '''
        Not used
        '''
        atom_ring_times = [0] * rdk_mol.GetNumAtoms()
        bond_ring_times = [0] * rdk_mol.GetNumBonds()

        # TODO GetRingInfo gives SymmetricSSSR, not TRUE SSSR
        ri = rdk_mol.GetRingInfo()
        for id_atoms in ri.AtomRings():
            for ida in id_atoms:
                atom_ring_times[ida] += 1
        for id_bonds in ri.BondRings():
            for idb in id_bonds:
                bond_ring_times[idb] += 1

        n_atoms_multiring = len(list(filter(lambda x: x > 1, atom_ring_times)))
        n_bonds_multiring = len(list(filter(lambda x: x > 1, bond_ring_times)))

        py_mol = pybel.readstring('smi', smiles)
        if ri.NumRings() != len(py_mol.sssr):
            print(
                'WARNING: SymmetricSSSR not equal to TRUE SSSR in rdkit. Use Openbabel instead:',
                smiles)
            n_atoms_multiring = pybel.Smarts('[R2]').findall(py_mol).__len__()
            n_bonds_multiring = n_atoms_multiring - 1

        return n_atoms_multiring, n_bonds_multiring
Ejemplo n.º 15
0
def smarts(s):
    if not isinstance(s, bytes):
        s = s.encode('ascii')
    try:
        return pybel.Smarts(s)
    except IOError as e:
        # Convert pybel's IOError (?!) into a ValueError
        raise ValueError(str(e))
Ejemplo n.º 16
0
 def _update(self):
     if self.showFragments and self.fragmentSmiles:
         loop = self.__update_items(self._items, self._widgets)
     elif self.colorFragments and self.selectedFragment:
         pattern = pybel.Smarts(self.selectedFragment)
         loop = self.__update_items(self._items, self._widgets, pattern)
     else:
         loop = self.__update_items(self._items, self._widgets)
     self.__schedule(loop)
Ejemplo n.º 17
0
def db_select_molecules(cls=None, subcls=None, type=None, subtype=None, tags=[], substructure="") -> pd.DataFrame:
    """Get a summary frame of molecules in the database

    :param tags: a list of tags of the db records (if multiple an 'OR' is taken)
    :type tags: list
    :param substructure: substructure SMARTS string
    :type substructure: str
    :return: pandas.core.frame.DataFrame
    """

    db = db_connect()
    tags_coll = db['tags']
    mols_coll = db['molecules']
    feats_coll = db['qchem_descriptors']

    tags_cur = tags_coll.find({'tag': {'$in': tags}} if tags else {})
    tags_df = pd.DataFrame(tags_cur)

    filter = {}
    if cls != "" and cls is not None:
        filter['metadata.class'] = cls
    if subcls != "" and subcls is not None:
        filter['metadata.subclass'] = subcls
    if type != "" and type is not None:
        filter['metadata.type'] = type
    if subtype != "" and subtype is not None:
        filter['metadata.subtype'] = subtype

    filter['_id'] = {'$in': tags_df.molecule_id.tolist()}

    mols_cur = mols_coll.find(filter)
    mols_df = pd.DataFrame(mols_cur)
    if 'name' not in mols_df.columns:
        mols_df['name'] = None

    if substructure:
        pattern = pybel.Smarts(substructure)
        mols_df['pybel_mol'] = mols_df['can'].map(lambda can: pybel.readstring("smi", can))
        mols_df = mols_df[mols_df['pybel_mol'].map(lambda mol: bool(pattern.findall(mol)))]
        mols_df = mols_df.drop('pybel_mol', axis=1)

    # merge tags in an outer way
    df = pd.merge(mols_df, tags_df, how='outer', left_on='_id', right_on='molecule_id', suffixes=('', '_tag'))

    # make tags into a list of tags
    df['metadata_str'] = df['metadata'].map(repr)
    grouped = df.groupby(['can', 'metadata_str'])
    # groupby tags
    df = pd.concat([grouped['metadata', 'molecule_id', 'name'].first(),
                    grouped['tag'].apply(list)], axis=1).reset_index().drop('metadata_str', axis=1)

    # fetch ids
    df['_ids'] = df['molecule_id'].map(lambda mid: [item['_id'] for item in feats_coll.find(
        {'molecule_id': ObjectId(mid)}, {'_id': 1})
                                                    ])
    df['num_conformers'] = df['_ids'].map(len)
    return df
Ejemplo n.º 18
0
    def index(self, smiles):
        double_double = pybel.Smarts('*=**=*')
        double_triple = pybel.Smarts('*=**#*')
        double_tert = pybel.Smarts('*=*[CX4;H0]')
        triple_tert = pybel.Smarts('*#*[CX4;H0]')
        r7wired = pybel.Smarts('C1=CC=CC=CC1')

        py_mol = pybel.readstring('smi', smiles)

        index = [
            len(double_double.findall(py_mol)),
            len(double_triple.findall(py_mol)),
            len(double_tert.findall(py_mol)),
            len(triple_tert.findall(py_mol)),
            len(r7wired.findall(py_mol)),
        ]

        return np.array(index)
Ejemplo n.º 19
0
def rearrange_smiles(aa_smiles):
    '''Rewrite an amino-acid smiles to start with the N-term and end with
    the C-term.'''
    mol = pybel.readstring('smi', aa_smiles)
    n_term_pat = pybel.Smarts('[$(NCC(O)=O)]')
    c_term_pat = pybel.Smarts('[$(OC(=O)CN)]')
    #Find location of start and end atoms
    n_term_idx = n_term_pat.findall(mol)[0][0]
    c_term_idx = c_term_pat.findall(mol)[0][0]
    #Rewrite smiles N-term first, then C-term
    rearranger = openbabel.OBConversion()
    rearranger.SetInAndOutFormats('smi', 'smi')
    rearranger.AddOption('f', openbabel.OBConversion.OUTOPTIONS,
                         str(n_term_idx))
    rearranger.AddOption('l', openbabel.OBConversion.OUTOPTIONS,
                         str(c_term_idx))
    outmol = openbabel.OBMol()
    rearranger.ReadString(outmol, aa_smiles)
    return rearranger.WriteString(outmol).strip()
Ejemplo n.º 20
0
def find_smarts_hb(dict_smarts, smarts_hb, lig_mol):
    # find atoms which can form a hydrogen bond and matches smarts
    hb_ph = set()
    for smart in dict_smarts[smarts_hb]:
        sma = pybel.Smarts(smart)
        all_coords = sma.findall(lig_mol)
        for coords in all_coords:
            for coor in coords:
                hb_mol = lig_mol.atoms[coor - 1]
                hb_ph.add(hb_mol.coords)
    return hb_ph
Ejemplo n.º 21
0
def smarts_search(mollist, smarts):
    ret = []
    query = pybel.Smarts(smarts)
    print query
    for mol in mollist.all():
        try:
            smiles = pybel.readstring("smi", str(mol.SMILES))
            if query.findall(smiles):
                ret.append(mol)
        except:
            pass
    return ret
Ejemplo n.º 22
0
def compile_smarts():
    SMARTS = [
        '[#6+0!$(*~[#7,#8,F]),SH0+0v2,s+0,S^3,Cl+0,Br+0,I+0]', '[a]',
        '[!$([#1,#6,F,Cl,Br,I,o,s,nX3,#7v5,#15v5,#16v4,#16v6,*+1,*+2,*+3])]',
        '[!$([#6,H0,-,-2,-3]),$([!H0;#7,#8,#9])]', '[r]'
    ]

    __PATTERNS = []

    for smarts in SMARTS:
        __PATTERNS.append(pybel.Smarts(smarts))
    return __PATTERNS
Ejemplo n.º 23
0
def pybel_neutralise(pybmol):
    pattern = pyb.Smarts(
        "[+1!h0!$([*]~[-1,-2,-3,-4]),-1!$([*]~[+1,+2,+3,+4])]")

    match_list = pattern.findall(pybmol)
    for match in match_list:
        atom = pybmol.GetAtom(match[0])
        charge = atom.GetFormalCharge()
        hnum = atom.GetImplicitHCount()
        atom.SetFormalCharge(0)
        atom.SetImplicitHCount(hnum - charge)
    return pybmol
Ejemplo n.º 24
0
 def dict_toxi(self):
     mol_list = []
     for smi in [self.smiles]:
         l = []
         mol = pb.readstring("smi", str(smi))
         for k, v in d.iteritems():
             k_smarts = pb.Smarts(k)
             n = len(k_smarts.findall(mol))
             if n == 0:
                 l.append(0)
             else:
                 for each in v:
                     d_list = []
                     v_smarts = pb.Smarts(each)
                     d_list.append(len(v_smarts.findall(mol)))
                     if n > sum(d_list):
                         l.append(1)
                     elif n == sum(d_list):
                         l.append(0)
         mol_list.append(sum(l))
     return mol_list
Ejemplo n.º 25
0
def substructure_embedding(mol, pattern):
    obmol, atom_map = (oasa.pybel_bridge.PybelConverter.
                       oasa_to_pybel_molecule_with_atom_map(mol))

    if not isinstance(pattern, pybel.Smarts):
        pattern = pybel.Smarts(pattern)

    matches = pattern.findall(obmol)

    rev_map = dict(map(reversed, atom_map.iteritems()))

    return [map(rev_map.get, match) for match in matches]
Ejemplo n.º 26
0
def is_alkane(py_mol) -> bool:
    import pybel
    from .formula import Formula
    atom_set = set(Formula(py_mol.formula).atomdict.keys())
    if atom_set != {'C', 'H'}:
        return False

    for s in ['[CX2]', '[CX3]', 'c', '[#6;v0,v1,v2,v3]']:
        if pybel.Smarts(s).findall(py_mol) != []:
            return False
    else:
        return True
Ejemplo n.º 27
0
def find_smarts(dict_smarts, smarts, lig_mol):
    # find groups of atoms which matches smarts
    coords_interaction = []
    for smart in dict_smarts[smarts]:
        sma = pybel.Smarts(smart)
        all_coords = sma.findall(lig_mol)
        for coords in all_coords:
            c_ph = []
            for coor in coords:
                c_mol = lig_mol.atoms[coor - 1]
                c_ph.append(c_mol.coords)
            coords_interaction.append(c_ph)
    return coords_interaction
Ejemplo n.º 28
0
 def unique_toxi(self):
     mol_list = []
     for smi in [self.smiles]:
         l = []
         mol = pb.readstring("smi", str(smi))
         for toxi in all_toxi:
             smarts = pb.Smarts(toxi)
             if len(smarts.findall(mol)) > 0:
                 l.append(1)
             else:
                 l.append(0)
         mol_list.append(sum(l))
     return mol_list
Ejemplo n.º 29
0
def atoms_nitrophenols(mol, phenol, nitro):
    ## returns phenols for which nitro groups are found in same ring
    def is_part_of_ring(ring, group):
        ring_atoms = atom_indices_ring(r, mol)  #not the most efficient
        return len([idx for idx in group if idx in ring_atoms]) > 0

    def atom_indices_ring(ring, mol):
        return [a.idx for a in mol.atoms if ring.IsInRing(a.idx)]

    def atom_indices_group(groups):
        return list(chain.from_iterable(groups))

    _phenol = pybel.Smarts(phenol).findall(mol)
    _nitro = pybel.Smarts(nitro).findall(mol)
    _rings = [ring for ring in mol.sssr if ring.IsAromatic()]
    atomlist_ring = []  # list of rings
    atomlist_nitrophenol = []  # list of (nitro)phenol groups
    for r in _rings:
        part = {'phenol': [], 'nitro': []}
        for x in _phenol:
            if not is_part_of_ring(r, x):
                continue
            part['phenol'].append(x)
            for y in _nitro:
                if not is_part_of_ring(r, y):
                    continue
                part['nitro'].append(y)
        if part['phenol'] and part['nitro']:
            atomlist_ring.append(
                set(
                    atom_indices_ring(r, mol) +
                    atom_indices_group(part['phenol']) +
                    atom_indices_group(part['nitro'])))
            atomlist_nitrophenol += part['phenol']
    # returning of atomlist_ring is optional
    # but the phenol groups are what are really counted so this is what is returned
    return atomlist_nitrophenol
Ejemplo n.º 30
0
        def MatchPlattsBGroups(self, smiles):

            # Load functional group database
            current_dir = os.getcwd()
            filepath = os.path.join(current_dir, 'groups.xls')
            wb = xlrd.open_workbook(filepath)
            wb.sheet_names()

            data = wb.sheet_by_name(u'PlattsB')
            col1 = data.col_values(0)
            col2 = data.col_values(1)
            col3 = data.col_values(2)

            databaseB = []
            for (SMART, name, B) in zip(col1, col2, col3):
                databaseB.append(functionalgroup(SMART, name, B))

            platts_B = 0
            mol = pybel.readstring("smi", smiles)
            for x in databaseB:
                # Initialize with dummy SMLES to check for validity of real one
                smarts = pybel.Smarts("CC")
                smarts.obsmarts = ob.OBSmartsPattern()
                success = smarts.obsmarts.Init(x.smarts.__str__())
                if success:
                    smarts = pybel.Smarts(x.smarts.__str__())
                else:
                    print "Invalid SMARTS pattern", x.smarts.__str__()
                    break
                matched = smarts.findall(mol)
                x.num = len(matched)
                if (x.num > 0):
                    print "Found group", x.smarts.__str__(
                    ), 'named', x.name, 'with contribution', x.value, 'to B', x.num, 'times'
                platts_B += (x.num) * (x.value)

            self.B = platts_B + 0.071