def selectBestFilePDBFasta (d_PDB, result_align, substruct) : # case with only one PDB d_PDB["best"] = {} if result_align == [] : d_PDB["best"]["PDB"] = d_PDB["p_pdb"] d_PDB["best"]["fasta"] = d_PDB["p_fasta"] else : for primary_key in result_align.keys () : for secondary_key in result_align[primary_key] .keys (): if result_align[primary_key][secondary_key] != "100.0%" : d_PDB["best"]["PDB"] = d_PDB["p_pdb"] d_PDB["best"]["fasta"] = d_PDB["p_fasta"] # fusion fasta for blast remove header tool.fusionchainfasta(d_PDB["p_fasta"]) return # control substructure presented // case identity 100% nb_chain = len (d_PDB["p_pdb_chain"]) i = 0 while i < nb_chain : l_ligand = parsePDB.retrieveListLigand(d_PDB["p_pdb_chain"][i]) if substruct in l_ligand : # print "999999",d_PDB d_PDB["best"]["PDB"] = d_PDB["p_pdb_chain"][i] d_PDB["best"]["fasta"] = d_PDB["p_fasta_chain"][i] return else : i = i + 1
def selectBestFilePDBFasta(d_PDB, result_align, substruct): # case with only one PDB d_PDB["best"] = {} if result_align == []: d_PDB["best"]["PDB"] = d_PDB["p_pdb"] d_PDB["best"]["fasta"] = d_PDB["p_fasta"] else: for primary_key in result_align.keys(): for secondary_key in result_align[primary_key].keys(): if result_align[primary_key][secondary_key] != "100.0%": d_PDB["best"]["PDB"] = d_PDB["p_pdb"] d_PDB["best"]["fasta"] = d_PDB["p_fasta"] # fusion fasta for blast remove header tool.fusionchainfasta(d_PDB["p_fasta"]) return # control substructure presented // case identity 100% nb_chain = len(d_PDB["p_pdb_chain"]) i = 0 while i < nb_chain: l_ligand = parsePDB.retrieveListLigand(d_PDB["p_pdb_chain"][i]) if substruct in l_ligand: # print "999999",d_PDB d_PDB["best"]["PDB"] = d_PDB["p_pdb_chain"][i] d_PDB["best"]["fasta"] = d_PDB["p_fasta_chain"][i] return else: i = i + 1
def datasetPreparation (ligand_ID, clean = 1): p_dir_dataset = pathManage.dataset(ligand_ID) l_folder = listdir(p_dir_dataset) indent = 0 for ref_folder in l_folder : # file include in dataset folder if len (ref_folder) != 4: continue l_pdbfile = listdir(p_dir_dataset + ref_folder + "/") indent = indent + 1 print ref_folder, indent # clean repertory -> only PDB ref and PDB l_pdbfile = listdir(p_dir_dataset + ref_folder + "/") if clean == 1 : for pdbfile in l_pdbfile : p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile if not search (".pdb", pdbfile ) or search ("subref", pdbfile) or len (pdbfile.split("_")[0]) == 3: remove (p_file_pdb) l_pdbfile = listdir(p_dir_dataset + ref_folder + "/") for pdbfile in l_pdbfile : p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile # extract ligand in PDB l_ligand = parsePDB.retrieveListLigand(p_file_pdb) # print l_ligand if l_ligand == []: continue else: l_atom_pdb_parsed = parsePDB.loadCoordSectionPDB(p_file_pdb) for name_ligand in l_ligand : l_lig_parsed = parsePDB.retrieveLigand(l_atom_pdb_parsed, name_ligand) if l_lig_parsed == [] : continue p_filout_ligand = p_dir_dataset + ref_folder + "/" + name_ligand + "_" + path.split(p_file_pdb)[1] writePDBfile.coordinateSection(p_filout_ligand , l_lig_parsed[0], "HETATM", header=0 , connect_matrix = 1) # ligand_ID write for shaep # print p_dir_dataset + ref_folder + "/" p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/", ligand_ID) if p_lig_ref == 0: continue # print p_lig_ref lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref) d_l_atom_substruct = substructTools.retrieveSubstruct(lig_ref_parsed, ligand_ID) # case with AMP without phosphate if d_l_atom_substruct == {}: continue # write ligand_ID for subs in d_l_atom_substruct.keys (): p_filout_substruct = p_dir_dataset + ref_folder + "/subref_" + subs + "_" + ref_folder + ".pdb" writePDBfile.coordinateSection(p_filout_substruct , d_l_atom_substruct [subs], "HETATM", header=0 , connect_matrix = 1) return 1
def filterBlastResult (d_dataset, p_dir_dataset, substruct, thresold_RX = 2.5, thresold_blast = 1e-4, debug = 1) : """ Filter resolution PDB Filter evalue """ for pdb_ref in d_dataset.keys () : if d_dataset[pdb_ref]["conserve"] == 0 : continue for pdb_blast_chain in d_dataset[pdb_ref]["align"].keys () : # filter e.value if debug == 1 : print d_dataset[pdb_ref]["align"][pdb_blast_chain], pdb_ref, thresold_blast # remove thresold and reference cleanner, remove if alignement on same protein if d_dataset[pdb_ref]["align"][pdb_blast_chain] <= thresold_blast and pdb_blast_chain [0:4].upper() != pdb_ref: if debug == 1 :print "CONTROL", pdb_blast_chain, d_dataset[pdb_ref]["align"][pdb_blast_chain] # dowload PDB files pdb_blast = pdb_blast_chain[0:4] p_pdb_blast = downloadFile.importPDB(pdb_blast, p_dir_dataset + pdb_ref + "/", dir_by_PDB=0, dbPDB = "/home/borrel/PDB/") if p_pdb_blast == 0 : continue # print p_pdb_blast, pdb_blast, "************" l_queries_by_chain = separeByChain(p_pdb_blast) # divise chain in reference folder try : RX = parsePDB.resolution(p_pdb_blast) except : RX = 100.0 l_ligand = parsePDB.retrieveListLigand(p_pdb_blast) if debug == 1 : print p_pdb_blast, l_ligand, RX, "control blast\n" + " ".join(l_queries_by_chain) + "\n" # remove apo forms and remove not substiuant + list ligand do not considered if l_ligand == [] or substruct in l_ligand: remove (p_pdb_blast) for queries_by_chain in l_queries_by_chain : remove (queries_by_chain) continue # case RMN structure try : RX = float(RX) except: continue if debug == 1 :print "----", RX, thresold_RX, "----" if float(RX) <= thresold_RX : if not "blast" in d_dataset[pdb_ref].keys () : d_dataset[pdb_ref]["blast"] = [pdb_blast_chain.upper ()] else : # control if other chain in list flag_in = 0 for PDB_in in d_dataset[pdb_ref]["blast"] : if PDB_in.split ("_")[0] == pdb_blast_chain.upper ().split ("_")[0] : flag_in = 1 if flag_in == 0 : d_dataset[pdb_ref]["blast"].append (pdb_blast_chain.upper ()) if debug == 1 :print d_dataset[pdb_ref]["blast"], pdb_ref
def searchLigands(prPDB, prresult): '''search ligands in PDB database out : list of ligands with PDB files associated''' print "Start Search Ligand In PDB file" pfilout = prresult + "resultLigandInPDB" # control file exist if path.exists(pfilout) and path.getsize(pfilout) != 0: return pfilout l_PDB = retriveListPDB(prPDB) filout = open(pfilout, "w") for PDBid in l_PDB: llig = parsePDB.retrieveListLigand(prPDB + PDBid.lower() + ".pdb") if llig != []: filout.write(PDBid + "\t" + " ".join(llig) + "\n") else: continue filout.close() return pfilout
def datasetPreparation(ligand_ID, clean=1): p_dir_dataset = pathManage.dataset(ligand_ID) l_folder = listdir(p_dir_dataset) indent = 0 for ref_folder in l_folder: # file include in dataset folder if len(ref_folder) != 4: continue l_pdbfile = listdir(p_dir_dataset + ref_folder + "/") indent = indent + 1 print ref_folder, indent # clean repertory -> only PDB ref and PDB l_pdbfile = listdir(p_dir_dataset + ref_folder + "/") if clean == 1: for pdbfile in l_pdbfile: p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile if not search(".pdb", pdbfile) or search( "subref", pdbfile) or len(pdbfile.split("_")[0]) == 3: remove(p_file_pdb) l_pdbfile = listdir(p_dir_dataset + ref_folder + "/") for pdbfile in l_pdbfile: p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile # extract ligand in PDB l_ligand = parsePDB.retrieveListLigand(p_file_pdb) # print l_ligand if l_ligand == []: continue else: l_atom_pdb_parsed = parsePDB.loadCoordSectionPDB(p_file_pdb) for name_ligand in l_ligand: l_lig_parsed = parsePDB.retrieveLigand( l_atom_pdb_parsed, name_ligand) if l_lig_parsed == []: continue p_filout_ligand = p_dir_dataset + ref_folder + "/" + name_ligand + "_" + path.split( p_file_pdb)[1] writePDBfile.coordinateSection(p_filout_ligand, l_lig_parsed[0], "HETATM", header=0, connect_matrix=1) # ligand_ID write for shaep # print p_dir_dataset + ref_folder + "/" p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/", ligand_ID) if p_lig_ref == 0: continue # print p_lig_ref lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref) d_l_atom_substruct = substructTools.retrieveSubstruct( lig_ref_parsed, ligand_ID) # case with AMP without phosphate if d_l_atom_substruct == {}: continue # write ligand_ID for subs in d_l_atom_substruct.keys(): p_filout_substruct = p_dir_dataset + ref_folder + "/subref_" + subs + "_" + ref_folder + ".pdb" writePDBfile.coordinateSection(p_filout_substruct, d_l_atom_substruct[subs], "HETATM", header=0, connect_matrix=1) return 1
def filterBlastResult(d_dataset, p_dir_dataset, substruct, thresold_RX=2.5, thresold_blast=1e-4, debug=1): """ Filter resolution PDB Filter evalue """ for pdb_ref in d_dataset.keys(): if d_dataset[pdb_ref]["conserve"] == 0: continue for pdb_blast_chain in d_dataset[pdb_ref]["align"].keys(): # filter e.value if debug == 1: print d_dataset[pdb_ref]["align"][ pdb_blast_chain], pdb_ref, thresold_blast # remove thresold and reference cleanner, remove if alignement on same protein if d_dataset[pdb_ref]["align"][ pdb_blast_chain] <= thresold_blast and pdb_blast_chain[ 0:4].upper() != pdb_ref: if debug == 1: print "CONTROL", pdb_blast_chain, d_dataset[pdb_ref][ "align"][pdb_blast_chain] # dowload PDB files pdb_blast = pdb_blast_chain[0:4] p_pdb_blast = downloadFile.importPDB(pdb_blast, p_dir_dataset + pdb_ref + "/", dir_by_PDB=0, dbPDB="/home/borrel/PDB/") if p_pdb_blast == 0: continue # print p_pdb_blast, pdb_blast, "************" l_queries_by_chain = separeByChain( p_pdb_blast) # divise chain in reference folder try: RX = parsePDB.resolution(p_pdb_blast) except: RX = 100.0 l_ligand = parsePDB.retrieveListLigand(p_pdb_blast) if debug == 1: print p_pdb_blast, l_ligand, RX, "control blast\n" + " ".join( l_queries_by_chain) + "\n" # remove apo forms and remove not substiuant + list ligand do not considered if l_ligand == [] or substruct in l_ligand: remove(p_pdb_blast) for queries_by_chain in l_queries_by_chain: remove(queries_by_chain) continue # case RMN structure try: RX = float(RX) except: continue if debug == 1: print "----", RX, thresold_RX, "----" if float(RX) <= thresold_RX: if not "blast" in d_dataset[pdb_ref].keys(): d_dataset[pdb_ref]["blast"] = [pdb_blast_chain.upper()] else: # control if other chain in list flag_in = 0 for PDB_in in d_dataset[pdb_ref]["blast"]: if PDB_in.split("_")[0] == pdb_blast_chain.upper( ).split("_")[0]: flag_in = 1 if flag_in == 0: d_dataset[pdb_ref]["blast"].append( pdb_blast_chain.upper()) if debug == 1: print d_dataset[pdb_ref]["blast"], pdb_ref