Exemple #1
0
def _mcs(data,
         asSmiles,
         atomCompare,
         bondCompare,
         threshold,
         ringMatchesRingOnly,
         completeRingsOnly,
         sanitize=True,
         removeHs=True,
         strictParsing=True,
         isomericSmiles=False,
         canonical=True,
         kekuleSmiles=False):
    ms = _parseMolData(data,
                       sanitize=sanitize,
                       removeHs=removeHs,
                       strictParsing=strictParsing)
    if not ms:
        return
    if len(ms) == 1:
        if asSmiles:
            print 'SMARTS'
            return Chem.MolToSmiles(ms[0])
        else:
            print 'SMILES'
            return Chem.MolToSmarts(ms[0])

    if threshold:
        threshold = float(threshold)
    try:
        mcs = MCS.FindMCS(ms,
                          atomCompare=atomCompare,
                          bondCompare=bondCompare,
                          ringMatchesRingOnly=ringMatchesRingOnly,
                          completeRingsOnly=completeRingsOnly,
                          threshold=threshold)
    except TypeError:
        mcs = MCS.FindMCS(ms,
                          atomCompare=atomCompare,
                          bondCompare=bondCompare,
                          ringMatchesRingOnly=ringMatchesRingOnly,
                          completeRingsOnly=completeRingsOnly)
    res = mcs.smarts
    if asSmiles:
        p = Chem.MolFromSmarts(res)
        for m in ms:
            if m.HasSubstructMatch(p):
                match = m.GetSubstructMatch(p)
                res = Chem.MolFragmentToSmiles(m,
                                               atomsToUse=match,
                                               isomericSmiles=isomericSmiles,
                                               canonical=canonical,
                                               kekuleSmiles=kekuleSmiles)
                break
    return res


#-----------------------------------------------------------------------------------------------------------------------
Exemple #2
0
    def mcs(self, fragments):
        """Find the maximum common substructure from a list of fragments.

        N.B.: Currently does not expose the many options provided by rdkit:
        http://www.rdkit.org/Python_Docs/rdkit.Chem.MCS-module.html

        Also, SMARTS match naturally includes heavy atoms only.

        :param fragments: two or more fragments containing common substructure
        :type fragments : list
        :return: maximum common substructure result
        :rtype : MCSResult
        """

        try:
            global MCS
            global rdk
            MCS
        except NameError:
            from rdkit.Chem import MCS
            from cinfony import rdk

        rf = [rdk.Molecule(f.molecule).Mol for f in fragments]
        cs = MCS.FindMCS(rf)

        return cs
Exemple #3
0
 def test_timeout_negative(self):
     try:
         MCS.FindMCS(lengthy_mols, timeout=-1)
     except ValueError:
         pass
     else:
         raise AssertionError("bad range check for timeout")
Exemple #4
0
 def test_min_atoms_1(self):
     try:
         result = MCS.FindMCS(simple_mols, minNumAtoms=1)
     except ValueError:
         pass
     else:
         raise AssertionError("should have raised an exception")
Exemple #5
0
def _mcs(data, params):
    ms = _parseMolData(data)
    if not ms:
        return
    if len(ms) == 1:
        if bool(int(params.get('asSmiles', '0'))):
            print 'SMARTS'
            return Chem.MolToSmiles(ms[0])
        else:
            print 'SMILES'
            return Chem.MolToSmarts(ms[0])

    atomCompare = params.get('atomCompare', 'elements')
    bondCompare = params.get('bondCompare', 'bondtypes')
    ringMatchesRingOnly = bool(int(params.get('ringMatchesRingOnly', '0')))
    completeRingsOnly = bool(int(params.get('completeRingsOnly', '0')))
    threshold = params.get('threshold', None)
    if threshold:
        threshold = float(threshold)
    try:
        mcs = MCS.FindMCS(ms,
                          atomCompare=atomCompare,
                          bondCompare=bondCompare,
                          ringMatchesRingOnly=ringMatchesRingOnly,
                          completeRingsOnly=completeRingsOnly,
                          threshold=threshold)
    except TypeError:
        mcs = MCS.FindMCS(ms,
                          atomCompare=atomCompare,
                          bondCompare=bondCompare,
                          ringMatchesRingOnly=ringMatchesRingOnly,
                          completeRingsOnly=completeRingsOnly)
    res = mcs.smarts
    if bool(int(params.get('asSmiles', '0'))):
        p = Chem.MolFromSmarts(res)
        for m in ms:
            if m.HasSubstructMatch(p):
                match = m.GetSubstructMatch(p)
                res = Chem.MolFragmentToSmiles(m,
                                               atomsToUse=match,
                                               isomericSmiles=True,
                                               canonical=False)
                break
    return res


#-----------------------------------------------------------------------------------------------------------------------
Exemple #6
0
 def test_timeout(self):
     t1 = time.time()
     result = MCS.FindMCS(lengthy_mols, timeout=0.1)
     self.assert_result(result, completed=0)
     self.assertTrue(result.numAtoms > 1)
     self.assertTrue(result.numBonds >= result.numAtoms-1, (result.numAtoms, result.numBonds))
     t2 = time.time()
     self.assertTrue(t2-t1 < 0.5, t2-t1)
Exemple #7
0
def make2dcanv(mmpcomps):
    """Function to make a 2D canv from MMPComps"""
    rdmols = []
    acts = []
    subs = []
    smsubs = []
    donemols = []
    for m in mmpcomps:
        # Ensure that this is not just the same comparison
        mol1 = Chem.MolFromMolBlock((str(m.xtal_mol.sdf_info)))
        mol2 = Chem.MolFromMolBlock((str(m.chembl_mol.sdf_info)))
        if [m.xtal_mol.cmpd_id.pk, m.chembl_mol.pk] in donemols or [
                m.chembl_mol.cmpd_id.pk, m.xtal_mol.pk
        ] in donemols:
            # Don't do the same comparison twice
            continue
        else:
            donemols.append([m.xtal_mol.cmpd_id.pk, m.chembl_mol.cmpd_id.pk])
        # Set the molecule name for the 3D display
        acts.append(render_act(m.xtal_act))
        acts.append(render_act(m.chembl_act))
        # Generate the two-d depictions after canonicalising the smiles
        mol1 = Chem.MolFromSmiles(Chem.MolToSmiles(mol1, isomericSmiles=True))
        mol2 = Chem.MolFromSmiles(Chem.MolToSmiles(mol2, isomericSmiles=True))
        smp = MCS.FindMCS([mol1, mol2],
                          completeRingsOnly=True,
                          ringMatchesRingOnly=True,
                          timeout=0.5).smarts
        p = Chem.MolFromSmarts(smp)
        subs.append(p)
        smsubs.append(smp)
        AllChem.Compute2DCoords(p)
        AllChem.GenerateDepictionMatching2DStructure(mol1,
                                                     p,
                                                     acceptFailure=True)
        AllChem.GenerateDepictionMatching2DStructure(mol2,
                                                     p,
                                                     acceptFailure=True)
        rdmols.extend([mol1, mol2])
    # So now we have the mols in a list with actvity information in a list
    # Order this list of molecules based on scaffold (p)
    # Get a list of the indices of rdmols to rearrange
    myinds = sorted(range(len(smsubs)), key=lambda x: smsubs[x])
    nmols = []
    nacts = []
    nsubs = []
    # Now rearrange everthing to suit
    for ind_m in myinds:
        nmols.extend([rdmols[ind_m * 2], rdmols[ind_m * 2 + 1]])
        nacts.extend([acts[ind_m * 2], acts[ind_m * 2 + 1]])
        nsubs.append(subs[ind_m])
    image = draw_acts(nmols, nacts, nsubs)
    output = StringIO.StringIO()
    image.save(output, format="PNG")
    contents = output.getvalue()
    return contents
Exemple #8
0
 def assert_search(self,
                   smiles,
                   numAtoms,
                   numBonds,
                   smarts=_ignore,
                   **kwargs):
     result = MCS.FindMCS(smiles, **kwargs)
     self.assert_result(result,
                        completed=1,
                        numAtoms=numAtoms,
                        numBonds=numBonds,
                        smarts=smarts)
Exemple #9
0
def get_decoys(pdb_file, mol_file, num_atoms, init='get_decoys_init'):
    """For each binding ligand, gets a list of decoy ligands. We filter by number
	of atoms and maximum common substructure (MCS). Then we generate conformers
	for each decoy and save them to the decoy_ligands folder"""

    init = eval(init)

    reader = SDMolSupplier(mol_file)
    mol = reader[0]
    output = []

    iterator = range(len(init.all_mols))
    random.shuffle(iterator)
    for i in iterator:
        if (init.all_mol_files[i] == mol_file or \
         abs(init.all_num_atoms[i] - num_atoms) > init.max_atom_dif):
            continue
        mcs = MCS.FindMCS([init.all_mols[i], mol],
                          minNumAtoms=init.max_substruct,
                          ringMatchesRingOnly=True,
                          completeRingsOnly=True,
                          timeout=1)
        if mcs.numAtoms == -1:
            #save the mol object as a PDB file in the decoys folder
            decoy_file = pdb_file.replace('/binding_ligands/',
                                          '/decoy_ligands/').replace(
                                              '.pdb',
                                              str(len(output)) + '.pdb')
            pdb_writer = PDBWriter(decoy_file)
            # generate the decoy and its conformers
            decoy2 = Chem.AddHs(init.all_mols[i])
            conf_ids = AllChem.EmbedMultipleConfs(decoy2, init.num_conformers)
            for cid in conf_ids:
                AllChem.MMFFOptimizeMolecule(decoy2, confId=cid)
                decoy = Chem.RemoveHs(decoy2)
                pdb_writer.write(decoy)

            pdb_writer.close()
            output.append([init.all_pdb_files[i], decoy_file])

        if len(output) >= init.max_num_decoys:
            break

    print 'Got the decoys for one ligand'
    return output
Exemple #10
0
def get_decoys(pdb_file, mol_file, num_atoms, init='get_decoys_init'):
    """
    For each binding ligand, get a list of decoy ligands. We filter by number of atoms and maximum common
    substructure (MCS). Returns filepaths to all binding ligand - decoy pair.

    :param pdb_file: pdb format ligand
    :param mol_file: mol format ligand
    :param num_atoms: ligand's atom number
    :param init:
    :return:
    nested list [[pdb_file, decoy_files]]
    """

    init = eval(init)
    reader = SDMolSupplier(mol_file)
    mol = reader[0]
    output = ""
    counter = 0

    # Shuffle which ligands we sample to avoid biases in decoy ligands
    iterator = range(len(init.all_mols))
    random.shuffle(iterator)
    for i in iterator:
        if (init.all_mol_files[i] == mol_file
                or abs(init.all_num_atoms[i] - num_atoms) >
                init.max_atom_dif):  # FIXME O2 time
            continue  # FIXME
        mcs = MCS.FindMCS([init.all_mols[i], mol],
                          minNumAtoms=init.max_substruct,
                          ringMatchesRingOnly=True,
                          completeRingsOnly=True,
                          timeout=1)
        if mcs.numAtoms == -1:
            if counter == init.max_num_decoys - 1:
                output += init.all_pdb_files[i]
                counter += 1
                break  # FIXME
        output += init.all_pdb_files[i] + ','
        counter += 1
    # Check to make sure there are enough decoys
    if counter < init.max_num_decoys:
        raise Exception("Not enough decoys for ligand " + pdb_file)
    print 'Got the decoys for one ligand'
    return [[pdb_file, output]]
Exemple #11
0
def create_lexicon(molecule1, molecule2):
    #Chem.Kekulize(molecule1)
    #Chem.Kekulize(molecule2)
    patt1 = Chem.MolFromSmarts(MCS.FindMCS([molecule2, molecule1], matchValences=True).smarts)
    matching1 = molecule2.GetSubstructMatch(patt1)
    matching2 = molecule1.GetSubstructMatch(patt1)
    
    #below is indices in m, ordered as patt‘s atoms
    index1 = range(molecule2.GetNumAtoms())
    #these are the atoms in the product that are NOT in the metastructure
    product_specific_atoms = list(set(index1) - set(matching1))

    matching1 = zip(matching1, range(molecule2.GetNumAtoms()) )
    matching2 = zip(matching2, range(molecule1.GetNumAtoms()) )
    
    #lexicon for what values equal what. this is a bit confusing but it's the product's substructure that's similar with the meta-metastructure's
    #then the corresponding atom on the meta-metastructure to the metastructure
    
    lexicon = sorted(zip([int(i[0]) for i in matching2], [int(i[0]) for i in matching1]))
    return lexicon
Exemple #12
0
def pattern_findersub(steroid1, steroid2, exceptions):
    m1 = Chem.MolFromSmiles(steroid1)
    m2 = Chem.MolFromSmiles(steroid2)
    patt1 = Chem.MolFromSmarts(MCS.FindMCS([Chem.MolFromSmiles(steroid1), Chem.MolFromSmiles(steroid2)]).smarts)
    
    matching1 = m1.GetSubstructMatch(patt1)
    matching1 = list(matching1)
    
    ####################important exception line
    for i in exceptions:
        matching1.append( i )
    
    #below is indices in m, ordered as patt‘s atoms
    index1 = range(Chem.MolFromSmiles(steroid1).GetNumAtoms())
    #these are the atoms in the substrate that are NOT in the product
    substrate_specific_atoms = list(set(index1) - set(matching1))
    
    del_bonds = []
    add_connections = []
    add_bonds = []
    del_connections = []
    
    for i in substrate_specific_atoms:
        atom = m1.GetAtomWithIdx(i)
        
        #get the bonds that are connected to indexed atom but not the ones that are in the 'meta-structure'
        neighbors = [x.GetIdx() for x in atom.GetNeighbors()] 
        extra_bonds = list(set(neighbors) & set(substrate_specific_atoms))
        
        #get the bonds of these atoms that need to be deleted 
        bondtype = []
        for bond in extra_bonds:
            bond = str(m1.GetBondBetweenAtoms(i, bond).GetBondType())
            bond = bond.replace('rdkit.Chem.rdchem.BondType.', '')
            bondtype.append( bond )

        del_connections.append( extra_bonds )
        del_bonds.append( bondtype )
   
    substrate_modifications = pd.DataFrame({'Substrate Unique Atoms': substrate_specific_atoms, 'Connections to be deleted': del_connections, 'Bonds to be deleted': del_bonds})
    return substrate_modifications
Exemple #13
0
def moonshot():

    from dgllife.utils import mol_to_bigraph, CanonicalAtomFeaturizer
    import pandas as pd
    import os
    df = pd.read_csv(
        os.path.dirname(graca.data.collections.__file__) +
        "/covid_submissions_all_info.csv")
    df = df.dropna(subset=["f_avg_pIC50"])

    from rdkit import Chem
    from rdkit.Chem import MCS

    ds = []
    for idx0, row0 in df.iterrows():
        smiles0 = row0["SMILES"]
        mol0 = Chem.MolFromSmiles(smiles0)
        for idx1, row1 in df.iloc[idx0 + 1:].iterrows():
            smiles1 = row1["SMILES"]
            mol1 = Chem.MolFromSmiles(smiles1)
            res = MCS.FindMCS([mol0, mol1])
            if res.numAtoms > 15:
                ds.append((
                    mol_to_bigraph(mol1,
                                   node_featurizer=CanonicalAtomFeaturizer(
                                       atom_data_field='feat')),
                    mol_to_bigraph(mol0,
                                   node_featurizer=CanonicalAtomFeaturizer(
                                       atom_data_field='feat')),
                    row1["f_avg_pIC50"],
                    row0["f_avg_pIC50"],
                ))

    ds_tr = ds[:500]
    ds_te = ds[500:]

    return ds_tr, ds_te
Exemple #14
0
def modify_metastructure(product_modifications, metastructure, steroid2):
    m2 = Chem.MolFromSmiles(steroid2)
    #msubstrate = Chem.MolFromSmiles(steroid1)
    patt1 = Chem.MolFromSmarts(MCS.FindMCS([metastructure, m2], matchValences=True).smarts)
    
    #convert number of product specifc atom to our metastructure
    anchors = [] #anchors are in the MCS, they will ultimately be deleted but are important for figuring out where to add bonds
    anchortype = []
    lexicon = create_lexicon(metastructure, m2)
    for i in product_modifications['Connections to be added'].tolist():
        for k in i: #connections in list if there are multiple
            for j in lexicon:
                if k == j[1]:
                    anchors.append( j[0] ) 
                    atom = metastructure.GetAtomWithIdx(int(j[0]))
                    anchortype.append( atom.GetAtomicNum()) 

    neighbors = [] #for every index gives the neighbors of the same index in the pandas DF earlier             
    for i in anchors:
        adjacent_atoms = []
        atom = metastructure.GetAtomWithIdx(i)
        adjacent_atoms = [x.GetIdx() for x in atom.GetNeighbors()] 
        neighbors.append( adjacent_atoms )
    
    #add the product-specific atoms
    em = Chem.EditableMol(metastructure)
    newindexes = []
    newanchors = []
    
    ###KEEP THESE DELETED####
    '''for i in product_modifications['Atomic Number'].tolist():
        newidx = em.AddAtom(Chem.Atom( int(i) ))
        newindexes.append( newidx )
    for i in range(len(anchors)):
        newanchor = em.AddAtom(Chem.Atom( anchortype[i] ))
        newanchors.append( newanchor )'''
        
    
    #####logic gate for if a carboxyl like addition is going on
    similar_indices = []
    for i in range(len(product_modifications)):
        for j in range(len(product_modifications)):
            if product_modifications['Connections to be added'].irow(i) == product_modifications['Connections to be added'].irow(j):
                similar_indices.append( i ) 
            
    if len(similar_indices) > 2:
        #translate neighbor number to what it corresponds to in m1
        m1 = em.GetMol()
        for i in lexicon:
            if product_modifications['Neighbors'].irow( similar_indices[0] )[0] == i[1]:
                neighbor = i[0]
                atom = m1.GetAtomWithIdx(neighbor)
                neighbortype = atom.GetAtomicNum()
                neighbor_of_neighbor = [x.GetIdx() for x in atom.GetNeighbors()]
                neighboranchor = em.AddAtom(Chem.Atom( int(neighbortype) ))
        new_atoms = []
        for i in range(len(product_modifications)):
            new_atom = em.AddAtom( Chem.Atom( int(product_modifications['Atomic Number'].irow(i)) ))
            new_atoms.append( new_atom )
            
        for i in range(len(product_modifications)):
                if str(product_modifications['Bonds to be added'].irow(i)[0]) == 'DOUBLE':
                    em.AddBond( int(neighboranchor),int(new_atoms[i]), Chem.BondType.DOUBLE)
                elif str(product_modifications['Bonds to be added'].irow(i)[0]) == 'SINGLE':
                    em.AddBond(int(neighboranchor),int(new_atoms[i]), Chem.BondType.SINGLE)
        for i in neighbor_of_neighbor:
            em.AddBond(int(i), int(neighboranchor), Chem.BondType.SINGLE)
            
        #get rid of old anchor
        for i in list(set(anchors)):
            em.RemoveAtom(i)
      
    else:
        #add the product-specific atoms
        em = Chem.EditableMol(metastructure)
        newindexes = []
        newanchors = []
        
        for i in product_modifications['Atomic Number'].tolist():
            newidx = em.AddAtom(Chem.Atom( int(i) ))
            newindexes.append( newidx )
        for i in range(len(anchors)):
            newanchor = em.AddAtom(Chem.Atom( anchortype[i] ))
            newanchors.append( newanchor )
            
        mref = em.GetMol()
        
        #combine the new atom with it's new anchor
        for i in range(len(newindexes)):
            if str(product_modifications['Bonds to be added'][i][0]) == 'DOUBLE':
                em.AddBond(int(newindexes[i]), int(newanchors[i]), Chem.BondType.DOUBLE)
            elif str(product_modifications['Bonds to be added'][i][0]) == 'SINGLE':
                em.AddBond(int(newindexes[i]), int(newanchors[i]), Chem.BondType.SINGLE)
                
        #combine new structure (newanchor + new atom) to the neighbors of the old anchor
        for i in range(len(anchors)):
            for j in range(len(neighbors[i])):
                atom = mref.GetAtomWithIdx( int(neighbors[i][j]) )
                em.AddBond(int(newanchors[i]), int(neighbors[i][j]), Chem.BondType.SINGLE)
                    
        #get rid of old anchor
        for i in anchors:
            em.RemoveAtom(i)
            
    m1 = em.GetMol()
    for atom in m1.GetAtoms():
        atom.SetNumRadicalElectrons(0)
    Chem.SanitizeMol(m1)
    return m1
Exemple #15
0
def modify_substrate(substrate_modifications, steroid1, steroid2, steroids):
    m1 = Chem.MolFromSmiles(steroid1)
    m2 = Chem.MolFromSmiles(steroid2)
    ms = Chem.MolFromSmiles(steroids)
    ############################################################
    #removes atoms that are removed via lyase activity, first must find atoms that are removed from the native substrate to the product
    #then we have to compare those atoms to our non-native substrate then systematically remove them the tricky thing here will be indexing (as always)
    native_lexicon = create_lexicon(ms, m2)
    substrates_lexicon = create_lexicon(m1, ms)
    
    ms_atoms = []
    ms_m2_atoms = []
    m2matchingatoms = []
    for atom in ms.GetAtoms():
        ms_atoms.append( atom.GetIdx() )
    for i in native_lexicon:
        ms_m2_atoms.append( i[0] )
        m2matchingatoms.append( i[1] )
    #see which atoms don't have overlap i.e. things that need to be deleted
    ms_cleaved = []
    for i in ms_atoms:
        if i not in ms_m2_atoms:
            ms_cleaved.append( i )
            
    mp_atoms = []
    for atom in m2.GetAtoms():
        mp_atoms.append( atom )
     
    mp_unique_atoms = []
    for i in mp_atoms:
        if i.GetIdx() not in m2matchingatoms:
            mp_unique_atoms.append( i.GetIdx() )
    
    #aromatic atoms will screw this code up, we need to make sure the atoms we're going to delete are due to aromaticitiy         
    aromatic_check = []
    aromatic_idxi = []
    aromatic_idxj = []
    for i in mp_unique_atoms:
        for j in mp_unique_atoms:
            try:
                aromatic_check.append( str( m2.GetBondBetweenAtoms(i, j).GetIsAromatic() ) )
                aromatic_idxi.append( i )
                aromatic_idxj.append( j )
            except:
                pass
            
    #translate ms_cleaved to our target
    target_specifics = []
    for i in ms_cleaved:
        for j in substrates_lexicon:
            if i == j[1]:
                target_specifics.append( j[0] )
           
    if 'True' not in aromatic_check:   
        AROMATIC_FLAG = None
        temp_lex = create_lexicon( ms, m1 )
        em = Chem.EditableMol(m1)
        for i in range(len(ms_cleaved)):
            temp_lex = create_lexicon( ms, m1 )
            for j in temp_lex:
                if j[0] == ms_cleaved[i]:
                    deletion_atom = j[1]
                    em.RemoveAtom(deletion_atom)
                    m1 = em.GetMol()
        m1smiles = Chem.MolToSmiles( m1 )
        m1smiles = clean_smiles( m1smiles )
        try:
            m1 = Chem.MolFromSmiles( m1smiles )
        except:
            pass
    else:
        AROMATIC_FLAG = 'GO'
 
    #create lexicon to compare atom indices'''
    lexicon = create_lexicon(m1, m2)
    ############################################################
    #This will be the double bonds specific to the substrate 
    #I have to do this because double bonds seem to be more specific than single bonds in RDkit, 
    #By knowing the exact position of the double bonds I need to remove and add, I can more accurately transform the molecule
    
    m2bondtypessub = []
    m2bondidxsub = []
    m2bondstartsub = []
    m2bondendsub = []
    
    m1bondtypessub = []
    m1bondidxsub = []
    m1bondstartsub = []
    m1bondendsub = []
      
    for i in lexicon:
        idx1 = i[0]
        m1bondidxsub.append( idx1 )
        m1bondtypessub.append( m1.GetBondWithIdx(idx1).GetBondType() )
    for i in m1bondidxsub:
        m1bondstartsub.append( m1.GetBondWithIdx(int(i)).GetBeginAtomIdx() )
        m1bondendsub.append( m1.GetBondWithIdx(int(i)).GetEndAtomIdx() )
    
    for i in m1bondstartsub:
        for j in lexicon:
            if i == j[0]:
                m2bondstartsub.append( j[1] )
    for i in m1bondendsub:
        for j in lexicon:
            if i == j[0]:
                m2bondendsub.append( j[1] )
    for i in m1bondidxsub:
        for j in lexicon:
            if i == j[0]:
                m2bondidxsub.append( j[1] ) 
    for i in m2bondidxsub:
        m2bondtypessub.append( m2.GetBondWithIdx(int(i)).GetBondType() )
        
    bondindices = ''
    bondindicessub = ''
    
    em = Chem.EditableMol(m1)    
    if len(BondIndex( ms, m2 )) != 0 and AROMATIC_FLAG != 'GO': #compares bonds between both the substrate, product, and target to determine if anything needs to be added
        bondindicessub = BondIndex( m1, m2 )
        bondindicessub = bondindicessub[bondindicessub['M1 Bond Type'] != bondindicessub['M2 Bond Type']]
        #bondindices = bondindices[bondindices['M1 Bond Index'] != bondindices['M1 Bond Start']]
        em = Chem.EditableMol(m1)
        if len(BondIndex(m1, m2)) != 0: ###This logic gate passes the bond formation if the substrate > product chemistry has no bond changes. I may actually destroy this step entirely...
            for i in range(len(bondindicessub)):
                if str(bondindicessub['M1 Bond Type'].irow(i)) == str('SINGLE') and str(bondindicessub['M2 Bond Type'].irow(i)) == str('DOUBLE'):
                    em.RemoveBond(int(bondindicessub['M1 Bond Start'].irow(i)), int(bondindicessub['M1 Bond End'].irow(i)))
                    em.AddBond(int(bondindicessub['M1 Bond Start'].irow(i)), int(bondindicessub['M1 Bond End'].irow(i)), Chem.BondType.DOUBLE)
                else:
                    str(bondindicessub['M1 Bond Type'].irow(i)) == str('DOUBLE')
                    em.RemoveBond(int(bondindicessub['M1 Bond Start'].irow(i)), int(bondindicessub['M1 Bond End'].irow(i)))
                    em.AddBond(int(bondindicessub['M1 Bond Start'].irow(i)), int(bondindicessub['M1 Bond End'].irow(i)), Chem.BondType.SINGLE)
    else:
        pass
        
    m1 = em.GetMol()
        
    ############################################################iterate through bonds in both molecules to see if we need to delete any
    #This will be the double bonds specific to the product
    m2bondtypes = []
    m2bondidx = []
    m2bondstart = []
    m2bondend = []
    
    m1bondtypes = []
    m1bondidx = []
    m1bondstart = []
    m1bondend = []
    
    for i in lexicon:
        idx2 = i[1]
        m2bondidx.append( idx2 )
        m2bondtypes.append( m2.GetBondWithIdx(idx2).GetBondType() )
    for i in m2bondidx:
        m2bondstart.append( m2.GetBondWithIdx(int(i)).GetBeginAtomIdx() )
        m2bondend.append( m2.GetBondWithIdx(int(i)).GetEndAtomIdx() )
        
    m1bondtypes = []
    m1bondidx = []
    m1bondstart = []
    m1bondend = []
    
    for i in m2bondstart:
        for j in lexicon:
            if i == j[1]:
                m1bondstart.append( j[0] )
    for i in m2bondend:
        for j in lexicon:
            if i == j[1]:
                m1bondend.append( j[0] )
    for i in m2bondidx:
        for j in lexicon:
            if i == j[1]:
                m1bondidx.append( j[0] ) 
    for i in m1bondidx:
        m1bondtypes.append( m1.GetBondWithIdx(int(i)).GetBondType() )
                
    #exceptions are atoms that extend from the start bond that actually are irrelevant because where they should go don't exist in the starting molecule
    #this is problematic because they could add bonds to places they shouldn't be
    exceptions = []    
    for i in range(len(m2bondend)):
        if m2bondend[i] not in [int(j[1]) for j in lexicon]:
            exceptions.append( m2bondend[i] )
    
    try: #this try skips the deconstruction phase if it doesn't need to happen
        bondindices = BondIndex( m2, m1 )
        patt1 = Chem.MolFromSmarts(MCS.FindMCS([Chem.MolFromSmiles(steroid1), Chem.MolFromSmiles(steroid2)]).smarts)
        #remove specififed atoms from the substrate
        to_delete_atoms = substrate_modifications['Substrate Unique Atoms'].tolist()
    
        #deletes atoms that are in the substrate but not the product, could be from lyases or whatnot
        for i in range(len(substrate_modifications['Substrate Unique Atoms'])):
            try:
                patt1 = Chem.MolFromSmarts(MCS.FindMCS([m1, Chem.MolFromSmiles(steroid2)]).smarts)
                matching1 = m1.GetSubstructMatch(patt1)
                matching1 = list(matching1)
                #below is indices in m, ordered as patt‘s atoms
                index1 = range(m1.GetNumAtoms())
                #these are the atoms in the substrate that are NOT in the product
                substrate_specific_atoms = list(set(index1) - set(matching1))
                
                #delete just the first substrate_specific_atom because the whole molecule will reindex 
                em = Chem.EditableMol(m1)
                em.RemoveAtom(to_delete_atoms[0])
            
                #need to fix the valences of the atoms we deleted earlier, for whatever reason a radical or a hydrogen is thrown on and the valence is incorrect.
                #Using bondindicessub because it's coordinates are 100% reliable
                for j in range(len(bondindicessub)):
                    if int(bondindicessub['M1 Bond End'].irow(j)) == int(substrate_specific_atoms[0]):
                        deletion_target_num = int(bondindicessub['M1 Bond Start'].irow(j))
                        deletion_target = m1.GetAtomWithIdx( int(bondindicessub['M1 Bond Start'].irow(j)) )
                        deletion_neighbors = [x.GetIdx() for x in deletion_target.GetNeighbors()] 
                        #get the bonds that are connected to indexed atom but not the ones that are in the 'meta-structure'
                        em = Chem.EditableMol(m1)
                        newidx = em.AddAtom(Chem.Atom(6))
                        if len(deletion_neighbors) == 1:
                            em.AddBond(newidx, deletion_neighbors[0], Chem.BondType.SINGLE)
                        elif len(deletion_neighbors) == 2:
                            em.AddBond(newidx, deletion_neighbors[0], Chem.BondType.SINGLE)
                            em.AddBond(newidx, deletion_neighbors[1], Chem.BondType.SINGLE)
                        elif len(deletion_neighbors) == 3:
                            em.AddBond(newidx, deletion_neighbors[0], Chem.BondType.SINGLE)
                            em.AddBond(newidx, deletion_neighbors[1], Chem.BondType.SINGLE)
                            em.AddBond(newidx, deletion_neighbors[2], Chem.BondType.SINGLE)
                        em.RemoveAtom(deletion_target_num)
            except:
                pass
    except:
        bondindices = ''
        bondindicessub = ''  
                
    m1 = em.GetMol()
    #Chem.SanitizeMol(m1)
    return m1, bondindices, bondindicessub, AROMATIC_FLAG
Exemple #16
0
def do_lloommppaa_proc(target_id,
                       pdb_protein,
                       smiles,
                       mol2_protein=None,
                       reactants=None,
                       products=None,
                       context=None):
    """Function to do the processing for LLOOMMPPAA"""
    from LLOOMMPPAA.models import PossReact
    from LLOOMMPPAA.reactions import run_react_proc, define_reacts, load_in_reagents, load_in_follow_ups, find_follow_ups, define_reaction_process
    # Load the data
    load_wonka_prot(target_id, pdb_protein, smiles)
    if reactants:
        # Find the potential synthesis vectors
        define_reacts()
        find_follow_ups(target_id=target_id)
        # Show the sides -> USER MUST SELECT A SIDE
        poss_reacts = PossReact.objects.filter(
            mol_id__prot_id__target_id=target_id)
        # Loop through all the possible reactions
        for ps_r in poss_reacts:
            print ps_r.react_id.name
            print "1):", ps_r.replaced_frag
            print "2):", ps_r.retained_frag
            print "3):", "SKIP"
            choice = int(raw_input("Select a fragment to replace..."))
            if choice == 1:
                context = ps_r.retained_frag_context
                react_frag = ps_r.retained_frag
                this_react = ps_r.react_id
                break
            elif choice == 2:
                context = ps_r.replaced_frag_context
                react_frag = ps_r.replaced_frag
                this_react = ps_r.react_id
                break
            else:
                continue
    if not context:
        print "YOU MUST SPECIFY A CONTEXT"
        if products:
            print "AUTO GENERATING FROM PRODUCTS"
            print products
            context = Chem.CanonSmiles(
                Chem.MolToSmiles(Chem.MolFromSmarts(
                    MCS.FindMCS(Chem.SDMolSupplier(products)).smarts),
                                 isomericSmiles=True))
            print context
        else:
            return
    if reactants:
        mol_id = ps_r.mol_id
        prot_id = ps_r.mol_id.prot_id
    else:
        mol_id = Molecule.objects.filter(prot_id__target_id=target_id,
                                         smiles=smiles)[0]
        prot_id = mol_id.prot_id
    my_prots = [prot_id]
    # Set the mol2 protein for this target -> throw a warning if this doesn't happen
    if mol2_protein:
        from PLIFS.models import PlifProtein
        pp = PlifProtein()
        pp.prot_id = prot_id
        pp.mol2_data = open(mol2_protein).read()
        pp.save()
    # Define the reactants and products
    if reactants:
        react_id = load_in_reagents("RUN_DEF", reactants, ps_r.react_id)
    else:
        react_id = None
    if products:
        my_react = Reaction.objects.get_or_create(name="DUMMY",
                                                  react_smarts="DUMMY",
                                                  retro_smarts="DUMMY",
                                                  cont_smarts="DUMMY")[0]
        this_react = my_react
        react_frag = context
        prod_id = load_in_follow_ups("RUN_PROD", products, my_react, mol_id)
    else:
        prod_id = None
    # Now set up the reaction itself
    react_proc = define_reaction_process(mol_id,
                                         context,
                                         my_prots,
                                         this_react,
                                         context,
                                         react_frag,
                                         products_id=prod_id,
                                         reactants_id=react_id)
    if react_id:
        react_proc.proc_stage = "RUN_REACTION"
        react_proc.save()
    if products:
        react_proc.proc_stage = "GENERATE CONFS"
        react_proc.save()
    # Now run the reaction and analysis itself
    run_react_proc(react_proc)
Exemple #17
0
  
    




# xyz to Mol
## call babel
## babel -ixyz

query_mol = readfile(file1)
template_mol = readfile(file2)

# mcs
skeleton_mcs = MCS.FindMCS([query_mol, template_mol])
skeleton_mol = Chem.MolFromSmarts(skeleton_mcs.smarts)


min_overlap = 6  # Require the overlap to be at least min_overlap atoms

if ( len(skeleton_mol.GetAtoms()) < min_overlap):
    print("These molecules share less than min_overlap atoms. Quitting.")


query_mcs_matches = query_mol.GetSubstructMatches(skeleton_mol)
print("query_mol contains overlapping fragment " + len(query_mcs_matches) + " times.")
template_mcs_matches = template_mol.GetSubstructMatches(skeleton_mol)
print("template_mol contains overlapping fragment " + len(template_mcs_matches) + " times.")

for query_mcs_match_index in range(len(query_mcs_matches)):
Exemple #18
0
    if (arg[2].endswith('.smi')):
        optSuppl = Chem.SmilesMolSupplier(arg[2])
    elif (arg[2].endswith('sdf')):
        optSuppl = Chem.SDMolSupplier(arg[2])
    else:
        print('File type not supported')
        sys.exit()

    w = Chem.SDWriter('alignLeadsOut.sdf')

    # Use align mol
    # Get common substructure for lead and .smi
    # Align copy to lead
    # Use AlignMolConformers with argument to specific atom IDs of substructure
    mols = [lead, optSuppl[0]]
    res = MCS.FindMCS(
        mols, threshold=0.9, completeRingsOnly=True
    )  # Calculates most common substructure and outputs SMARTS pattern
    p = Chem.MolFromSmarts(
        res.smarts)  # Creates mol object of most common substructure
    core = AllChem.DeleteSubstructs(
        AllChem.ReplaceSidechains(Chem.RemoveHs(lead), p),
        Chem.MolFromSmiles('*'))
    core.UpdatePropertyCache()
    for mol in optSuppl:
        try:
            AllChem.ConstrainedEmbed(mol, core, useTethers=False)
            w.write(mol)
        except:
            pass
def fmcstimeout(p, q):
    return MCS.FindMCS([p, q], timeout=0.01).smarts
Exemple #20
0
 def fmcstimeout(p, q):
     return MCS.FindMCS([p, q]).smarts
Exemple #21
0
def view_2dmol(option,
               maps=None,
               out_put=None,
               target_id=None,
               legends=None,
               extra=None):
    """Function to render a mol image from a smiles.
    The input (option) could be 1) a list of smiles 2) a smiles 3) pdb_code
    Returns a molecule image as data"""
    option = str(option)
    print option
    try:
        option = ast.literal_eval(option)
    except:
        pass
    if type(option) is list:
        mols = [Chem.MolFromSmiles(str(x)) for x in option]
        p = Chem.MolFromSmarts(MCS.FindMCS(mols).smarts)
        AllChem.Compute2DCoords(p)
        [AllChem.GenerateDepictionMatching2DStructure(x, p) for x in mols]
        image = Draw.MolsToGridImage(mols, 2, legends=legends)
    # If it's a PDB code
    elif Chem.MolFromSmiles(str(option)) is None and type(option) is str:
        mol = Chem.MolFromSmiles(
            str(
                Molecule.objects.filter(
                    prot_id__code=option)[0].cmpd_id.smiles))
        AllChem.GenerateDepictionMatching3DStructure(
            mol,
            Chem.MolFromMolBlock(
                str(Molecule.objects.filter(
                    prot_id__code=option)[0].sdf_info)))
        image = Draw.MolToImage(mol)
    # If it's a
    elif type(option) is str:
        mol = Chem.MolFromSmiles(str(option))
        if extra == "SIMPLE":
            image = Draw.MolToImage(mol)
        elif extra is None:
            h_map = None
            image = Draw.MolToImage(mol,
                                    size=(100, 100),
                                    fitImage=True,
                                    highlightMap=h_map)
        else:
            sub = Chem.MolFromSmiles(str(extra))
            h_map = get_h_map(mol, sub)
            image = Draw.MolToImage(mol, highlightMap=h_map)
        if maps is None:
            pass
        else:
            maps = float(maps)
            draw = ImageDraw.Draw(image)
            dim = (20, 0) + (20, image.size[1])
            draw.line(dim,
                      fill=(255 - int(255 * maps), int(255 * maps), 0),
                      width=10)
    else:
        print "NOT VALID TYPE"
        return "NOT VALID TYPE"
    output = StringIO.StringIO()
    image.save(output, format="PNG")
    contents = output.getvalue()
    return contents