def SearchChemicalSubstruct (pr_data, pr_result, control = 0): # substructure search p_filout = pr_result + "findSruct" filout = open (p_filout, "w") l_ligand = listdir(pr_data) print l_ligand for ligand in l_ligand : print ligand l_file = listdir(pr_data + ligand + "/") for f in l_file : if ligand == "ZM241385" or ligand == "source": group = f else : group = f.split ("_")[-2] p_file_PDB = pr_data + ligand + "/" + f print p_file_PDB if control == 1 : ControlPDBFormat(p_file_PDB) l_atom = parsing.loadCoordSectionPDB(p_file_PDB, remove_H = 1) if ligand == "ZM241385" : ll_atom_lig = parsing.retrieveLigand(l_atom, "ZMA") else : ll_atom_lig = parsing.retrieveLigand(l_atom, "RES") for l_atom_lig in ll_atom_lig : l_subs = searchPDB.interestStructure(l_atom_lig, more_flex = 1) filout.write (str (ligand) + "\t" + str (f) + "\t" + str (group) + "\t" + " ".join(l_subs) + "\n") filout.close () # best group ergomine -> 2128 (2013) && 6882 (2013) # best group taladegid -> 7527 (2013) Vanderblit # best group eticlopride -> 1285 (2010) # ZM241385 => mod7msp
def CheckComplexQuality(l_in, name_lig, limit_RX, limit_RFree, one_PDB_out, debug = 1): """Check if the PDB file is similar and preserve file with the best Quality remove file contain only DNA, RNA structure in : list of PDB files out : void -> change directly PDB list""" l_PDB = deepcopy(l_in) nb_PDB = len(l_PDB) l_RX = [] i = 0 while i < nb_PDB: # load PDB d_PDB_load = loadFile.ExtractInfoPDBID(l_PDB[i]) # case PDB not found => often difference between list and database if d_PDB_load == {} : del l_PDB[i] nb_PDB = nb_PDB - 1 continue # check Header header_PDB = d_PDB_load["Header"] # Check DNA or RNA if search ("dna", header_PDB) or search ("rna", header_PDB): if debug == 1 : print "Exit => out dna", l_PDB[i] del l_PDB[i] nb_PDB = nb_PDB - 1 continue # check quality RX = d_PDB_load["RX"] RFree = d_PDB_load["RFree"] if float (RX) > limit_RX or float (RFree) > limit_RFree : if debug == 1 : print "Exit => Quality structure", l_PDB[i], RX, RFree del l_PDB[i] nb_PDB = nb_PDB - 1 continue # check if the structure contain the substructure # -> because when we improve the resolution the ligand change and the search PDB -> ERROR l_interest_sub = searchPDB.interestStructure(d_PDB_load[name_lig][0]) if l_interest_sub == [] : if debug == 1 : print "Exit => No Sub found", l_PDB[i], RX, RFree del l_PDB[i] nb_PDB = nb_PDB - 1 continue # Check false ligand, hooked to the protein l_atom_protein = d_PDB_load["protein"] l_atom_lig = d_PDB_load[name_lig][0] if parsing.checkLigandHooked (l_atom_protein, l_atom_lig) == 1: if debug == 1 : print "Exit => ligand = Modificated Amino acid", l_PDB[i] nb_PDB = nb_PDB - 1 del l_PDB[i] continue l_RX.append (RX) i = i + 1 if len (l_PDB) == 1 or len (l_PDB) == 0 : return l_PDB # case where we want only one PDB -> retrieve best quality if one_PDB_out == 1 : return [l_PDB[l_PDB.index (min (l_PDB))]] return l_PDB
def Builder(name_database, RX = 3.00, RFree = 0.25, one_PDB_by_lig = 0, debug = 1): """ Dataset Builder in : - open file result of filter ligand PDB out : - log file - dataset file -> ligand with associated PDB """ if one_PDB_by_lig == 0 : name_dataset = name_database + "/" + str (RX) + "_" + str (RFree) + "_multiPDB" else : name_dataset = name_database + "/" + str (RX) + "_" + str (RFree) + "_uniquePDB" pr_database = pathManage.result(name_database) pr_result = pathManage.result(name_dataset) if debug : print "== Path result " + pr_result + "==\n" # check dataSet exist !!!!!! # short cut l_file_dataset = pathManage.retriveDataSetFile (pr_result) if len(l_file_dataset) != 0 : return l_file_dataset # load structure d_lig_PDB = loadFile.LigandInPDB(pr_database + "resultLigandInPDB") nb_lig = len(d_lig_PDB.keys()) print "NB ligand included database:", nb_lig # print d_lig_PDB.keys().index("HSO") -> search index ligand i = 0 while i < nb_lig: name_lig = d_lig_PDB.keys()[i] ####################################### # step 1 search chemical substructure # ####################################### PDB_ref = d_lig_PDB[name_lig][0] if debug : print PDB_ref, name_lig, i, nb_lig # if not possible to load the ligand -> remove lig try : l_atom_lig_ref = loadFile.ligandInPDBConnectMatrixLigand(PDB_ref, name_lig) except : if debug == 1 : print "Exit => load ligand-l59" del d_lig_PDB[name_lig] nb_lig = nb_lig - 1 continue # search substructure interest l_interest_sub = searchPDB.interestStructure(l_atom_lig_ref) # search interest structure if debug : print "Interest substructures in " + str(name_lig) + "-" + str (PDB_ref) + " " + "-".join(l_interest_sub) if l_interest_sub == []: if debug == 1 : print "Exit => Not substructure-l68" del d_lig_PDB[name_lig] nb_lig = nb_lig - 1 continue ####################################################### # Step 2 Control quality of PDB + ligand hooked + option one # ####################################################### else : # control dataset quality if debug : print "List PDB checked -> ", d_lig_PDB[name_lig] l_PDB = checkPDBfile.CheckComplexQuality(d_lig_PDB[name_lig], name_lig, RX, RFree, one_PDB_by_lig) # remove the entrance key with the ligand if l_PDB == []: if debug == 1 : print "Exit => Not No PDB selected-l82" del d_lig_PDB[name_lig] nb_lig = nb_lig - 1 continue else : d_lig_PDB[name_lig] = l_PDB i = i + 1 if debug == 1 : print "Number of ligand selected =>", nb_lig # structure and file dataset and control RX + length bond WriteDataset (d_lig_PDB, pr_result) return Builder(name_database, RX , RFree , one_PDB_by_lig , debug = 1)