Example #1
0
File: main.py Project: ABorrel/LSRs
def datasetPreparation (ligand_ID, clean = 1):

    p_dir_dataset = pathManage.dataset(ligand_ID)
    l_folder = listdir(p_dir_dataset)
    indent = 0

    for ref_folder in l_folder  :
        # file include in dataset folder
        if len (ref_folder) != 4:
            continue
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        indent = indent + 1
        print ref_folder, indent

        # clean repertory -> only PDB ref and PDB
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        if clean == 1 : 
            for pdbfile in l_pdbfile : 
                p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile
                if not search (".pdb", pdbfile ) or search ("subref", pdbfile) or len (pdbfile.split("_")[0]) == 3: 
                    remove (p_file_pdb)

        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile : 
            p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile
            # extract ligand in PDB
            l_ligand = parsePDB.retrieveListLigand(p_file_pdb)
#             print l_ligand
            if l_ligand == []:
                continue
            else:
                l_atom_pdb_parsed = parsePDB.loadCoordSectionPDB(p_file_pdb)
                for name_ligand in l_ligand : 
                    l_lig_parsed = parsePDB.retrieveLigand(l_atom_pdb_parsed, name_ligand)
                    if l_lig_parsed == [] : 
                        continue
                    p_filout_ligand = p_dir_dataset + ref_folder + "/" + name_ligand + "_" + path.split(p_file_pdb)[1]
                    writePDBfile.coordinateSection(p_filout_ligand , l_lig_parsed[0], "HETATM", header=0 , connect_matrix = 1)

        # ligand_ID write for shaep
#         print p_dir_dataset + ref_folder + "/"
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/", ligand_ID)
        if p_lig_ref == 0:

            continue
#         print p_lig_ref
        lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref)
        d_l_atom_substruct = substructTools.retrieveSubstruct(lig_ref_parsed, ligand_ID)
        # case with AMP without phosphate
        if d_l_atom_substruct == {}:
            continue
        # write ligand_ID
        for subs in d_l_atom_substruct.keys ():
            p_filout_substruct = p_dir_dataset + ref_folder + "/subref_" +  subs + "_" + ref_folder + ".pdb"
            writePDBfile.coordinateSection(p_filout_substruct , d_l_atom_substruct [subs], "HETATM", header=0 , connect_matrix = 1)

    return 1
Example #2
0
def searchReplacement (smile, PDB_query, PDB_ref, name_ligand, in_cycle = 0) : 
    
    metal_find  = searchMetal (smile)
    if metal_find != 0 : 
        p_dir_dataset = pathManage.dataset(name_ligand + "/" + PDB_ref)
        l_PDB_query = pathManage.findPDBQueryDataset(p_dir_dataset)
        for p_query in l_PDB_query : 
            if search (PDB_query, p_query): 
                p_PDB_query = p_query
                break
        if "p_PDB_query" in locals() : 
            l_atom_parsed = parsePDB.loadCoordSectionPDB(p_query)
            l_ions_PDB = parsePDB.retrieveListIon(l_atom_parsed)
            
            if metal_find in l_ions_PDB : 
                l_atom_ion = parsePDB.retrieveLigand(l_atom_parsed, metal_find)
                filout = open (p_dir_dataset + str(metal_find) + "_" + p_query.split("/")[-1], "w")
                for atom_ion in l_atom_ion : 
                    writePDBfile.coordinateSection(filout, atom_ion, recorder = "HETATM", header = str(metal_find), connect_matrix = 0)
                filout.close ()
                return "metal", metal_find
    


    if in_cycle == 0:
        if searchRing(smile) == 1 : 
            return "cycle",""      
    if searchP(smile) == 1 : 
        return "P", ""
    elif searchB(smile) == 1 : 
        return "B",""
    elif searchF (smile) == 1 :
        return "F", ""
    elif searchCl (smile) == 1 :
        return "Cl", ""
    elif searchBr (smile) == 1 :
        return "Br", ""
    elif searchBe (smile) == 1 : 
        return "Be", ""
    elif searchNO2 (smile) == 1 : 
        return "NO2", ""
    elif searchSulfonyl(smile) == 1: 
        return "SO2",""
    elif searchS (smile) == 1 :
        return "S", ""
    elif searchCON (smile) == 1 : 
        return "CON",""
    elif searchCarboxy (smile) == 1 : 
        return "COO",""
    elif searchConly(smile) == 1 :
        return "onlyC", ""
    elif searchCandO (smile) == 1 : 
        return "C+O", ""
    elif searchCandN (smile) == 1 : 
        return "C+N", "" 
    elif searchCandOandN (smile) == 1 :
        return "C+O+N", ""

    return "other"  ,""
Example #3
0
File: tool.py Project: ABorrel/LSRs
def removeChain (path_protein_PDB, p_dir_out) : 
    
    name_file = path.split(path_protein_PDB)[-1]
    path_filout = p_dir_out + name_file
    
    list_atom = parsePDB.loadCoordSectionPDB(path_protein_PDB, section="ATOM")
    
    for atom in list_atom : 
        atom["chainID"] = ""
    
    writePDBfile.coordinateSection(path_filout, list_atom, "ATOM")
    
    return path_filout
Example #4
0
def removeChain(path_protein_PDB, p_dir_out):

    name_file = path.split(path_protein_PDB)[-1]
    path_filout = p_dir_out + name_file

    list_atom = parsePDB.loadCoordSectionPDB(path_protein_PDB, section="ATOM")

    for atom in list_atom:
        atom["chainID"] = ""

    writePDBfile.coordinateSection(path_filout, list_atom, "ATOM")

    return path_filout
Example #5
0
def manageTMalign(path_protein):

    list_atoms = parsePDB.loadCoordSectionPDB(path_protein)
    dico_residues = parsePDB.arrangeResidues(list_atoms)
    list_res = dico_residues.keys()
    list_res.sort()

    filout = open(path_protein, "w")
    for resID in list_res:
        for atom in dico_residues[resID]:
            writePDBfile.coordinateStructure(atom, "ATOM", filout)

    filout.close()
    return path_protein
Example #6
0
def manageTMalign (path_protein ) : 
    
    list_atoms = parsePDB.loadCoordSectionPDB(path_protein)
    dico_residues = parsePDB.arrangeResidues(list_atoms)
    list_res = dico_residues.keys()
    list_res.sort ()
    
    filout = open (path_protein, "w")
    for resID in list_res : 
        for atom in dico_residues[resID] : 
            writePDBfile.coordinateStructure(atom, "ATOM", filout)
            
    
    filout.close ()
    return path_protein
Example #7
0
def separeByChain (path_PDB_file):
    """
    separe PDB file by chain
    """
    file_PDB_parsed = parsePDB.loadCoordSectionPDB(path_PDB_file, section="")
    
    l_file = []
    file_open_write = {}
    for atom_PDB in file_PDB_parsed : 
        chain = atom_PDB["chainID"]
        if not chain in file_open_write.keys () : 
            file_open_write [chain] = open(path_PDB_file[0:-4] + "_" + chain + ".pdb", "w")
            l_file.append (path_PDB_file[0:-4] + "_" + chain + ".pdb")
            writePDBfile.coordinateStructure(atom_PDB, file_open_write [chain] )
        else : 
            writePDBfile.coordinateStructure(atom_PDB, file_open_write [chain] )
    
    # close files
    for chain in file_open_write.keys () : 
        file_open_write[chain].close ()
        
    return l_file
Example #8
0
def separeByChain(path_PDB_file):
    """
    separe PDB file by chain
    """
    file_PDB_parsed = parsePDB.loadCoordSectionPDB(path_PDB_file, section="")

    l_file = []
    file_open_write = {}
    for atom_PDB in file_PDB_parsed:
        chain = atom_PDB["chainID"]
        if not chain in file_open_write.keys():
            file_open_write[chain] = open(
                path_PDB_file[0:-4] + "_" + chain + ".pdb", "w")
            l_file.append(path_PDB_file[0:-4] + "_" + chain + ".pdb")
            writePDBfile.coordinateStructure(atom_PDB, file_open_write[chain])
        else:
            writePDBfile.coordinateStructure(atom_PDB, file_open_write[chain])

    # close files
    for chain in file_open_write.keys():
        file_open_write[chain].close()

    return l_file
Example #9
0
File: main.py Project: ABorrel/LSRs
def retrieveSubstructSuperimposed (name_lig, thresold_BS = 4.5, thresold_superimposed_ribose = 2.5, thresold_superimposed_pi = 3, thresold_shaep = 0.4):

    # ouput
    p_dir_dataset = pathManage.dataset(name_lig)
    p_dir_result = pathManage.result(name_lig )
    l_folder_ref = listdir(p_dir_dataset)

    # log control
    p_log = open(p_dir_result + "log_superimposed.txt", "w")

    # control extraction
    d_control = {}
    d_control["pr ref"] = 0
    d_control["lig query"] = 0
    d_control["subref"] = {}
    d_control["subref empty"] = {}
    d_control["out sheap"] = {}
    filout_control = open (p_dir_result + "quality_extraction.txt", "w")

    # stock smile code
    d_smile = {}

    # sheap control
    d_filout_sheap = {}
    d_filout_sheap ["list"] = [p_dir_result + "shaep_global.txt"]
    d_filout_sheap["global"] = open (p_dir_result + "shaep_global.txt", "w") 
    d_filout_sheap["global"].write ("name\tbest_similarity\tshape_similarity\tESP_similarity\n")

    for ref_folder in l_folder_ref :
        # control folder reference name
        if len (ref_folder) != 4 : 
            p_log.write ("[ERROR folder] -> " + ref_folder + "\n")
            continue

        # reference
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/", name_lig)
        try:
            lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
#             print len (lig_ref_parsed)
        except:
            p_log.write ("[ERROR ligand ref] -> " + p_lig_ref + "\n")
            continue

        #control
        d_control["pr ref"] = d_control["pr ref"] + 1

        # output by reference
        p_dir_result_ref = pathManage.result(name_lig + "/" + ref_folder)
        d_filout_superimposed = {}
        d_filout_superimposed["global"] = open (p_dir_result_ref + "all_ligand_aligned.pdb", "w")
        d_filout_superimposed["sheap"] = open (p_dir_result_ref + "all_ligand_aligned_" + str (thresold_shaep)  + ".pdb", "w")
        
        
        
        # write lig ref -> connect matrix corrrect in all reference and all sheap
        writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_ref_parsed, "HETATM", connect_matrix = 1)
        writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_ref_parsed, "HETATM", connect_matrix = 1)
        
        # inspect folder dataset
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile : 
            # no ligand file
            if len (pdbfile.split ("_")) == 1 : 
                continue
            pdbfile = pdbfile[:-4] # remove extention
            
            if len(pdbfile.split ("_")[0]) == 3  and len(pdbfile.split ("_")[1]) == 4 and pdbfile.split ("_")[1] != ref_folder:
                p_lig = p_dir_dataset + ref_folder + "/" + pdbfile  + ".pdb"
                if p_lig_ref != p_lig : 
                    # pass case where ligand replace same ligand -> does not need run
                    if pdbfile.split ("_")[0] == name_lig : 
                        p_log.write ("[REMOVE] -> same ligand substituate")
                        continue
                    
                    # parsed ligand query
                    lig_parsed = parsePDB.loadCoordSectionPDB(p_lig, "HETATM")

                    # find matrix of rotation
                    p_matrix = pathManage.findMatrix(p_lig_ref, p_lig, name_lig)
                    # control file matrix exist
                    if not path.exists(p_matrix) : 
                        p_log.write ("[ERROR] -> Matrix transloc " + p_lig_ref + " " + p_lig + " " + name_lig + "\n")
                        continue
                    
                    # control
                    d_control["lig query"] = d_control["lig query"] + 1
                    
                    # find the path of complex used
                    p_complex = p_dir_dataset + ref_folder + "/" + p_lig.split ("/")[-1][4:]
                    
                    # ligand rotated -> change the referentiel
                    superposeStructure.applyMatrixLigand(lig_parsed, p_matrix)
                    
                    
                    # use substruct
                    l_p_substruct_ref = pathManage.findSubstructRef (pathManage.dataset(name_lig) + ref_folder + "/" , name_lig)
                    for p_substruct_ref in l_p_substruct_ref : 
                        # ribose or phosphate
                        struct_type = p_substruct_ref.split ("_")[-2]
                        substruct_parsed = parsePDB.loadCoordSectionPDB(p_substruct_ref, "HETATM")
                        
                        l_atom_substituate = neighborSearch.searchNeighborAtom(substruct_parsed, lig_parsed, struct_type, p_log, thresold_superimposed_ribose = thresold_superimposed_ribose, thresold_superimposed_pi = thresold_superimposed_pi)    
                        # control find 
                        if len (l_atom_substituate) == 0 :  
                            if not struct_type in d_control["subref empty"].keys () : 
                                d_control["subref empty"][struct_type] = 1
                            else : 
                                d_control["subref empty"][struct_type] = d_control["subref empty"][struct_type] + 1
                            continue
                        
                        else : 
                            if not struct_type in d_control["subref"].keys () : 
                                d_control["subref"][struct_type] = 1
                            else : 
                                d_control["subref"][struct_type] = d_control["subref"][struct_type] + 1
                            
                            # write PDB file, convert smile
                            p_substituate_pdb = p_dir_result_ref + "substituent_" + pdbfile.split ("_")[0] + "_" + pdbfile.split ("_")[1] + "_" + struct_type + ".pdb"
                            writePDBfile.coordinateSection(p_substituate_pdb, l_atom_substituate, recorder="HETATM", header=0, connect_matrix = 1)
    
                            # sheap reference on part of ligand
                            p_sheap = runOtherSoft.runShaep (p_substruct_ref, p_substituate_pdb, p_substituate_pdb[0:-4] + ".hit", clean = 0)
                            val_sheap = parseShaep.parseOutputShaep (p_sheap)
                            if val_sheap == {} : 
                                p_log.write ("[ERROR] -> ShaEP " + p_substituate_pdb + " " + p_substruct_ref + "\n")
                                
                                if not struct_type in d_control["out sheap"].keys () :
                                    d_control["out sheap"][struct_type] = 1
                                else : 
                                    d_control["out sheap"][struct_type] = d_control["out sheap"][struct_type] + 1
                                continue
                            
                            # control thresold sheap
                            if not struct_type in d_filout_sheap.keys () : 
                                d_filout_sheap[struct_type] = {}
                                d_filout_sheap[struct_type] = open (p_dir_result + "shaep_global_" + struct_type + ".txt", "w")
                                d_filout_sheap[struct_type].write ("name\tbest_similarity\tshape_similarity\tESP_similarity\n")
                                d_filout_sheap["list"].append (p_dir_result + "shaep_global_" + struct_type + ".txt") # to improve with python function
                            
                            # write value in ShaEP control
                            d_filout_sheap[struct_type].write (ref_folder + "_" +  str(pdbfile.split ("_")[1]) + "_" + struct_type + "_" + str (pdbfile.split ("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n")
                            d_filout_sheap["global"].write (ref_folder + "_" +  str(pdbfile.split ("_")[1]) + "_" + struct_type + "_" + str (pdbfile.split ("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n")
                            
                            # rename file substituent with shaEP value
                            rename(p_substituate_pdb, p_substituate_pdb[:-4] + "_" + str (val_sheap["best_similarity"]) + ".pdb")
                            # rename and change the file name
                            p_substituate_pdb = p_substituate_pdb[:-4] + "_" + str (val_sheap["best_similarity"]) + ".pdb"
                            
                            # write all substruct in global file
                            writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_parsed, recorder= "HETATM", header = str(p_lig.split ("/")[-1]) + "_" + str (val_sheap["best_similarity"]) ,  connect_matrix = 1)
                            
                            # control sheap thresold    
                            if float(val_sheap["best_similarity"]) >= thresold_shaep  : 
                                
                                # write subligand superimposed selected in global files
                                writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_parsed, recorder= "HETATM", header = str(p_lig.split ("/")[-1]) + "_" + str (val_sheap["best_similarity"]) ,  connect_matrix = 1)
                                
                                ############
                                # write BS #
                                ############
                                # not only protein superimposed -> also ion and water
                                l_atom_complex = parsePDB.loadCoordSectionPDB(p_complex)
                                superposeStructure.applyMatrixProt(l_atom_complex, p_matrix)
                                p_file_cx = p_dir_result_ref +  "CX_" + p_lig.split ("/")[-1]
                                # write CX
                                writePDBfile.coordinateSection(p_file_cx, l_atom_complex, recorder="ATOM", header= p_lig.split ("/")[-1], connect_matrix = 0)
    
                                # search atom in BS
                                l_atom_binding_site = []
                                for atom_complex in l_atom_complex : 
                                    for atom_substruct in lig_parsed : 
                                        if parsePDB.distanceTwoatoms (atom_substruct, atom_complex) <= thresold_BS :
                                            if not atom_complex in l_atom_binding_site : 
                                                l_atom_binding_site.append (deepcopy(atom_complex))
                                
                                # 3. retrieve complet residue
                                l_atom_BS_res = parsePDB.getResidues(l_atom_binding_site, l_atom_complex)
                                                
                                # 4. write binding site
                                p_binding = p_dir_result_ref +  "BS_" + p_lig.split ("/")[-1]
                                writePDBfile.coordinateSection(p_binding, l_atom_BS_res, "ATOM", p_binding, connect_matrix = 0)
                                
                                # smile code substituate analysis                    
                                # Step smile -> not conversion if shaep not validate 
                                smile_find = runOtherSoft.babelConvertPDBtoSMILE(p_substituate_pdb)
                                if not struct_type in d_smile.keys ()  :
                                    d_smile[struct_type] = {}
                                    d_smile[struct_type][smile_find] = {}
                                    d_smile[struct_type][smile_find]["count"] = 1
                                    d_smile[struct_type][smile_find]["PDB"] = [pdbfile.split ("_")[1]]
                                    d_smile[struct_type][smile_find]["ligand"] = [pdbfile.split ("_")[0]]
                                    d_smile[struct_type][smile_find]["ref"] = [ref_folder]
                                else : 
                                    if not smile_find in d_smile[struct_type].keys () : 
                                        d_smile[struct_type][smile_find] = {}
                                        d_smile[struct_type][smile_find]["count"] = 1
                                        d_smile[struct_type][smile_find]["PDB"] = [pdbfile.split ("_")[1]]
                                        d_smile[struct_type][smile_find]["ligand"] = [pdbfile.split ("_")[0]] 
                                        d_smile[struct_type][smile_find]["ref"] = [ref_folder]
                                    else : 
                                        d_smile[struct_type][smile_find]["count"] = d_smile[struct_type][smile_find]["count"] + 1
                                        d_smile[struct_type][smile_find]["PDB"].append (pdbfile.split ("_")[1])
                                        d_smile[struct_type][smile_find]["ligand"].append (pdbfile.split ("_")[0])
                                        d_smile[struct_type][smile_find]["ref"].append (ref_folder)

                            else : 
                                if not struct_type in d_control["out sheap"].keys () : 
                                    d_control["out sheap"][struct_type] = 1
                                else : 
                                    d_control["out sheap"][struct_type] = d_control["out sheap"][struct_type] + 1

        tool.closeDicoFile (d_filout_superimposed)

    # sheap control    
    tool.closeDicoFile (d_filout_sheap)
    for p_file_sheap in d_filout_sheap["list"] : 
        runOtherSoft.RhistogramMultiple (p_file_sheap)    
        
            
    # write list of smile
    for substruct in d_smile.keys () : 
        p_list_smile = pathManage.result(name_lig) + "list_" + substruct + "_" + str (thresold_shaep) + "_smile.txt"
        filout_smile = open (p_list_smile, "w")
        for smile_code in d_smile[substruct].keys () : 
            l_lig = d_smile[substruct][smile_code]["ligand"]
            l_PDB = d_smile[substruct][smile_code]["PDB"]
            l_ref = d_smile[substruct][smile_code]["ref"]
            filout_smile.write (str (smile_code) + "\t" + str (d_smile[substruct][smile_code]["count"]) + "\t" + " ".join (l_PDB) + "\t" + " ".join (l_ref) + "\t" + " ".join(l_lig) + "\n")
        filout_smile.close ()
    p_log.close ()
    
    # control
    filout_control.write ("NB ref: " + str(d_control["pr ref"]) + "\n")
    filout_control.write ("Ligand query: " + str(d_control["lig query"]) + "\n")
    for k in d_control["subref"].keys () :
        filout_control.write ("LSR " + str (k) + ": " + str(d_control["subref"][k]) + "\n")
    for k in d_control["subref empty"].keys () :
        filout_control.write ("NB LSR empty " + str (k) + ": " + str(d_control["subref empty"][k]) + "\n")
    for k in d_control["out sheap"].keys () :
        filout_control.write ("LSR out by sheap " + str (k) + ": " + str(d_control["out sheap"][k]) + "\n")
    
    filout_control.write ("**********************\n\n")
    for k in d_control["subref"].keys () :
        filout_control.write ("LSR keep" + str (k) + ": " + str(d_control["subref"][k] - d_control["out sheap"][k]) + "\n")
    
    filout_control.close ()
    
    return 1
Example #10
0
def superpositionAllRef (l_ligand, name_folder_final, debug = 1):   
    
    pr_final = pathManage.result("final_" + name_folder_final)
    pr_align = pathManage.generatePath(pr_final + "refAlignement/")
    
    l_ref = []
    d_filout_pdb = {}
    d_filout_RMSE = {}
    d_ref = {}
    l_file_RMSE = []
    for ligand in l_ligand : 
        d_filout_pdb[ligand] = open (pr_align + ligand + "_" + "superimposed.pdb" , "w")
        d_filout_RMSE[ligand] = open (pr_align + ligand + "_" + "RMSE.txt" , "w")
        l_file_RMSE.append (pr_align + ligand + "_" + "RMSE.txt") 
    
    l_pr_type_ref = listdir(pr_final) 
    for pr_type_ref in l_pr_type_ref : 
        if debug : print "1", pr_type_ref
        # case where pr_substruct is a file not a folder
        try : l_pr_sub = listdir(pr_final + pr_type_ref + "/")
        except : continue

        for pr_sub in l_pr_sub : 
            print "2", pr_sub

            # case cycle -> append in list respertory with new folder
            if pr_sub == "cycle" : 
                l_pr_sub.remove ("cycle")
                l_pr_sub_cycle = listdir (pr_final + pr_type_ref + "/cycle")
                for pr_sub_cycle in l_pr_sub_cycle : 
                    l_pr_sub.append ("cycle/" + pr_sub_cycle)
                break
        
        for pr_sub in l_pr_sub : 
            try : l_pr_ref = listdir (pr_final + pr_type_ref + "/" + pr_sub)
            except : pass
            if debug : print "3", pr_sub
            
            for pr_ref in l_pr_ref : 
                if debug : print "4", pr_ref
                # case no folder
                try : l_file = listdir(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/")
                except : continue
                for name_file in l_file : 
                    if search("LGD_REF_A",name_file) and search(".pdb",name_file): 
                        #print "2222", l_ref
                        if name_file.split("_")[3][:4] in l_ref : 
                            print "!!!!!", "IN"
                            break
                        else : l_ref.append (name_file.split ("_")[3][:4])                       


                        ligand = name_file.split ("_")[2]
                        l_atom_ligand = parsePDB.loadCoordSectionPDB(pr_final + pr_type_ref + "/" + pr_sub  + "/" + pr_ref + "/LGD/" + name_file, "HETATM", remove_H=1)
                        l_atom_adenine = substructTools.retrieveAdenine(l_atom_ligand)
                        if not ligand in d_ref.keys () : 
                            # stock in tempory dictionary for the reference
                            d_ref[ligand] = []
                            d_ref[ligand].append (l_atom_ligand)
                            d_ref[ligand].append (l_atom_adenine)
                            writePDBfile.coordinateSection(d_filout_pdb[ligand], l_atom_ligand, "HETATM", connect_matrix = 1)
                            continue
                        else : 
                            rotation, translocation =  superimpose.rigid_transform_3D(l_atom_adenine, d_ref[ligand][-1])
                            if rotation == None or translocation == None : 
                                continue
                            # rotation + translation
                            l_atom_lig_rotated = superimpose.applyTranformation(rotation, translocation, l_atom_in=l_atom_ligand)
                            # write PDB file and RMSE
#                             print "============"
#                             print ligand, pr_ref
#                             print len (l_atom_lig_rotated)
#                             print len (d_ref[ligand][0])
#                             print "============"
                            if len (l_atom_lig_rotated) != len (d_ref[ligand][0]) : 
                                continue
                        
                            writePDBfile.coordinateSection(d_filout_pdb[ligand], l_atom_lig_rotated, "HETATM", connect_matrix = 1)
                            RMSE_ligand = superimpose.rmse(d_ref[ligand][0], l_atom_lig_rotated)
                            d_filout_RMSE[ligand].write (str (pr_ref) + pr_type_ref  + "\t" + str(RMSE_ligand) + "\n")
            
    # close files
    for lig in d_filout_pdb.keys () : 
        d_filout_pdb[lig].close ()
        d_filout_RMSE[lig].close ()

    for file_RMSE in l_file_RMSE : 
        runOtherSoft.Rhistogram(file_RMSE, "RMSE_Adenine")                                 
Example #11
0
def enantiomer(l_ligand, name_folder_final, debug = 1) : 
    "to do file output"
    
    pr_final = pathManage.result("final_" + name_folder_final)
    
    pr_enantiomer = pathManage.generatePath(pr_final + "enantiomer/")
    
    l_ref = []

    d_filout = {}
    for ligand in l_ligand : 
        d_filout[ligand] = {}
        d_filout[ligand]["O3OP"]= open (pr_enantiomer + ligand + "_" + "O3OP" , "w")
        d_filout[ligand]["O4O5"]= open (pr_enantiomer + ligand + "_" + "O4O5" , "w")
        d_filout[ligand]["OPOP"]= open (pr_enantiomer + ligand + "_" + "OPOP" , "w")
        
    l_pr_type_ref = listdir(pr_final) 
    for pr_type_ref in l_pr_type_ref : 
        if debug : print "1", pr_type_ref
        # case where pr_substruct is a file not a folder
        try : l_pr_sub = listdir(pr_final + pr_type_ref + "/")
        except : continue

        for pr_sub in l_pr_sub : 
            print "2", pr_sub

            # case cycle -> append in list respertory with new folder
            if pr_sub == "cycle" : 
                l_pr_sub.remove ("cycle")
                l_pr_sub_cycle = listdir (pr_final + pr_type_ref + "/cycle")
                for pr_sub_cycle in l_pr_sub_cycle : 
                    l_pr_sub.append ("cycle/" + pr_sub_cycle)
                break
        
        for pr_sub in l_pr_sub : 
            try : l_pr_ref = listdir (pr_final + pr_type_ref + "/" + pr_sub)
            except : pass
            if debug : print "3", pr_sub
            
            for pr_ref in l_pr_ref : 
                if debug : print "4", pr_ref
                # case no folder
                try : l_file = listdir(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/")
                except : continue
                for name_file in l_file : 
                    if search("LGD_REF_A",name_file) and search(".pdb",name_file): 
                        #print "2222", l_ref
                        if name_file.split("_")[3][:4] in l_ref : 
                            print "!!!!!", "IN"
                            break
                        else : l_ref.append (name_file.split ("_")[3][:4])                       
 
                        ligand = name_file.split ("_")[2]
                        l_atom_ligand = parsePDB.loadCoordSectionPDB(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/" + name_file, "HETATM")
                        d_minO3OP = 100
                        for atom_ligand in l_atom_ligand : 
                            if atom_ligand["name"] == "O4'" :
                                atom_O4 = atom_ligand
                            elif atom_ligand["name"] == "O5'" :
                                atom_O5 = atom_ligand
                            elif  atom_ligand["name"] == "O3'" :
                                atom_O3 = atom_ligand
                            elif  atom_ligand["name"] == "O1A" :
                                atom_O1A = atom_ligand
                            elif  atom_ligand["name"] == "O2A" :
                                atom_O2A = atom_ligand
                            elif  atom_ligand["name"] == "O1B" :
                                atom_O1B = atom_ligand
                            elif  atom_ligand["name"] == "O2B" :
                                atom_O2B = atom_ligand
                            #elif  atom_ligand["name"] == "O3B" :
                            #    atom_O3B = atom_ligand
                    
                        # d O4 - O5        
                        try : d_O4O5 = parsePDB.distanceTwoatoms(atom_O4, atom_O5)
                        except : continue
                        d_filout[ligand]["O4O5"].write (pr_ref + "_" + pr_type_ref  + "\t" + str (d_O4O5) + "\n")

                        # d O3 - OP
                        for atom_ligand in l_atom_ligand : 
                            if ligand == "AMP" : 
                                if atom_ligand["name"] == "O1P" or atom_ligand["name"] == "O2P" or atom_ligand["name"] == "O3P" : 
                                    d_tempO3OP = parsePDB.distanceTwoatoms(atom_O3, atom_ligand)
                                    if d_tempO3OP < d_minO3OP : 
                                        d_minO3OP = d_tempO3OP
                                        atom_tempO3OP = deepcopy(atom_ligand)
                            else : 
                                if atom_ligand["name"] == "O1A" or atom_ligand["name"] == "O2A" or atom_ligand["name"] == "O3A" : 
                                    d_tempO3OP = parsePDB.distanceTwoatoms(atom_O4, atom_ligand)
                                    if d_tempO3OP < d_minO3OP : 
                                        d_minO3OP = d_tempO3OP
                                        atom_tempO3OP = deepcopy(atom_ligand)
                        d_filout[ligand]["O3OP"].write (pr_ref + "_" + pr_type_ref  +"_" + str(atom_tempO3OP["name"]) + "\t" + str (d_minO3OP) + "\n")
    
                        # d OP OP
                        d_OP = {}
                        if ligand == "ATP" or ligand == "ADP" : 
                            d_OP ["O1AO1B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O1B)
                            d_OP ["O1AO2B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O2B)
                            #d_OP ["O1AO3B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O3B)
                            d_OP ["O2AO1B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O1B)
                            d_OP ["O2AO2B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O2B)
                            #d_OP ["O2AO3B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O3B)
                        
                            d_minOPOP = min (d_OP.values())
                            #print d_minOPOP
                            k_min = [name for name, age in d_OP.items() if age == min (d_OP.values())][0]
                            #print k_min
                            d_filout[ligand]["OPOP"].write (pr_ref + "_" + pr_type_ref  + "_" + str(k_min) + "\t" + str (d_minOPOP) + "\n")
                    
                        try :
                            del d_OP 
                            del atom_O1A
                            del atom_O1B
                            del atom_O2A
                            del atom_O2B
                        except : 
                            pass
                        try : 
                            del atom_O3
                            del atom_O4
                            del atom_O5
                        except :
                            pass
            
    # close files
    for lig in l_ligand : 
        for type_dist in d_filout[lig].keys () : 
            p_file = d_filout[lig][type_dist].name
            d_filout[lig][type_dist].close ()
            runOtherSoft.Rhistogram(p_file, type_dist, brk = 20)
Example #12
0
def globalArrangement (pr_orgin, p_smile, p_family, name_ligand, l_ligand_out):
    
#     print "--------"
#     print pr_orgin
#     print p_smile
#     print p_family
#     print name_ligand
#     print "--------"
    
    
    subst = p_smile.split ("_")[-3]
    
    filin = open (p_smile, "r")
    l_line_smile = filin.readlines ()
    filin.close()
    
    for line_smile in l_line_smile : 
        
        # search substructure
#         print line_smile
        l_PDB_query = line_smile.split ("\t")[-3].split (" ")
#         print l_PDB_query
        l_PDB_ref = line_smile.split ("\t")[-2].split (" ")
        l_ligand = line_smile.strip().split ("\t")[-1].split (" ")
        
        # search replacement
        smile = line_smile.split ("\t")[0]
        
        # search if LSR is small -> thresold < 3
        small_LSR = smileAnalysis.smallLSR (smile) 
        if subst == "ribose" :  
            if small_LSR == 1 : 
                first_folder = "ribose_small"
            else : 
                first_folder = "ribose"
        else : 
            if small_LSR == 1 : 
                first_folder = "Pi_small"
            else : 
                first_folder = "Pi"
        
        
        print smile, l_PDB_query, l_PDB_ref, l_ligand, subst, small_LSR
        replacement, metal = smileAnalysis.searchReplacement (smile, l_PDB_query[0], l_PDB_ref[0], name_ligand)
        
        # case with cycle -> search replacement 2
        if replacement == "cycle" : 
            replacement2, metal = smileAnalysis.searchReplacement (smile, l_PDB_query[0], l_PDB_ref[0], name_ligand, in_cycle = 1)
            replacement = replacement + "/" + replacement2 # new folder

        # case metal
        if replacement == "metal" : 
            print metal, l_PDB_query, l_PDB_ref, name_ligand
        
        len_find = len (l_PDB_ref)
        i = 0
        while i < len_find : 
            
            # exclusion of ligand out
            if l_ligand[i] in l_ligand_out : 
                i = i + 1
                continue
            
            
            group, family = analysis.findFamilyAndGroup(l_PDB_ref[i])
            
            # folder reference
            pr_dataset = pathManage.dataset(name_ligand + "/" + l_PDB_ref[i])
            
            PDB_ref = pathManage.findPDBRef(pr_dataset)
            p_ligand_ref = pathManage.findligandRef(pr_dataset , name_ligand)
            l_frag_ref = pathManage.findSubstructRef(pr_dataset, name_ligand)
            for f_ref in l_frag_ref :
                if search (subst, f_ref) : 
                    p_frag_ref = f_ref
                    break
            
            # folder_query
            pr_result = pathManage.result(name_ligand + "/" + l_PDB_ref[i])
            l_protein_tranloc = pathManage.findPDBQueryTransloc(pr_result)
            for p_t in l_protein_tranloc : 
                if search (l_ligand[i], p_t) and search (l_PDB_query[i], p_t) : 
                    p_protein_query = p_t
                    break
                
            if replacement != "metal" : 
                p_lig_query = pathManage.findligandQuery(pr_dataset , l_ligand[i], l_PDB_query[i])
            else : 
                p_lig_query = pathManage.findligandQuery(pr_dataset ,metal, l_PDB_query[i])
            # need apply transloc matrix
            matrix_transloc = pathManage.findMatrix(p_ligand_ref, p_lig_query, name_ligand)
            lig_query_parsed = parsePDB.loadCoordSectionPDB(p_lig_query)
            try : superposeStructure.applyMatrixLigand(lig_query_parsed, matrix_transloc)
            except : 
                i = i + 1
                continue
            
            
            p_lig_substituate = pathManage.findSubstructFind(pr_result, l_ligand[i], l_PDB_query[i], subst)
            l_p_BS = pathManage.findFileBS(pr_result, l_PDB_query[i])
            for BS in l_p_BS : 
                if search (l_ligand[i], BS) : 
                    p_BS = BS
                    break
            
            
#             print pr_final
#             print "***************"
#             print PDB_ref
#             print p_ligand_ref
#             print p_frag_ref
#             print "----"
#             print p_protein_query
#             print p_lig_query
#             print p_lig_substituate
#             print p_BS
#             print "**************"
            # ajouter group + family 2 lettre
            pr_final = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-"  + str (group) + "_" + l_PDB_ref[i] +  "/" 
            pr_ligand = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-" +  str (group) + "_" + l_PDB_ref[i] + "/LGD/"
            pr_BS = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-" + str (group) + "_" + l_PDB_ref[i] + "/BS/"
            pr_sust = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-"  + str (group) + "_" + l_PDB_ref[i] + "/LSR/"
            
            if not path.isdir(pr_final):
                makedirs (pr_final)
            
            if not path.isdir(pr_ligand):
                makedirs (pr_ligand)
            
            if not path.isdir(pr_BS):
                makedirs (pr_BS)
                
            if not path.isdir(pr_sust):
                makedirs (pr_sust)   
            
            # list file
            p_list_smile_queries = pr_sust + "list.smile"
            if not path.exists(p_list_smile_queries) : 
                file_smile_queries = open (p_list_smile_queries, "w")
            else : 
                file_smile_queries = open (p_list_smile_queries, "a")
            file_smile_queries.write (str(smile) + "\n")
            file_smile_queries.close ()
            
            # lig de la query
            writePDBfile.coordinateSection(pr_ligand + "LGD_" + p_lig_query.split ("/")[-1], lig_query_parsed, recorder = "HETATM", header = "LCG_" + p_lig_query.split ("/")[-1], connect_matrix = 1)
            runOtherSoft.babelConvertPDBtoSMILE(pr_ligand + "LGD_" + p_lig_query.split ("/")[-1], clean_smi = 1)
            # lig de reference + smile
            copy2(p_ligand_ref, pr_ligand + "LGD_REF_" + p_ligand_ref.split ("/")[-1])
            runOtherSoft.babelConvertPDBtoSMILE(pr_ligand + "LGD_REF_" + p_ligand_ref.split ("/")[-1])
            # LSR de ref
            copy2(p_frag_ref, pr_sust + "LSR_REF_" + name_ligand + "_" + l_PDB_ref[i] + ".pdb")
            # protein query
            #copy2(p_protein_query, pr_final)
            # LSR query -> p_lig_ref only for the name
            copy2(p_lig_substituate, pr_sust + "LSR_" + subst + "_"  + p_lig_query.split ("/")[-1])
            # BS query
            copy2(p_BS, pr_BS)   
            
            # BS from reference
            l_atom_BS = parsePDB.computeBS (PDB_ref, p_ligand_ref, thresold = 4.50, option_onlyATOM = 0)
            writePDBfile.coordinateSection(pr_BS + "BS_REF_" + name_ligand + "_" + PDB_ref.split ("/")[-1], l_atom_BS, recorder = "ATOM", header = "BS_REF_" + name_ligand + "_" + PDB_ref, connect_matrix = 0)
            
            i = i + 1
    
    return 1
Example #13
0
def datasetPreparation(ligand_ID, clean=1):

    p_dir_dataset = pathManage.dataset(ligand_ID)
    l_folder = listdir(p_dir_dataset)
    indent = 0

    for ref_folder in l_folder:
        # file include in dataset folder
        if len(ref_folder) != 4:
            continue
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        indent = indent + 1
        print ref_folder, indent

        # clean repertory -> only PDB ref and PDB
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        if clean == 1:
            for pdbfile in l_pdbfile:
                p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile
                if not search(".pdb", pdbfile) or search(
                        "subref", pdbfile) or len(pdbfile.split("_")[0]) == 3:
                    remove(p_file_pdb)

        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile:
            p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile
            # extract ligand in PDB
            l_ligand = parsePDB.retrieveListLigand(p_file_pdb)
            #             print l_ligand
            if l_ligand == []:
                continue
            else:
                l_atom_pdb_parsed = parsePDB.loadCoordSectionPDB(p_file_pdb)
                for name_ligand in l_ligand:
                    l_lig_parsed = parsePDB.retrieveLigand(
                        l_atom_pdb_parsed, name_ligand)
                    if l_lig_parsed == []:
                        continue
                    p_filout_ligand = p_dir_dataset + ref_folder + "/" + name_ligand + "_" + path.split(
                        p_file_pdb)[1]
                    writePDBfile.coordinateSection(p_filout_ligand,
                                                   l_lig_parsed[0],
                                                   "HETATM",
                                                   header=0,
                                                   connect_matrix=1)

        # ligand_ID write for shaep
#         print p_dir_dataset + ref_folder + "/"
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/",
                                             ligand_ID)
        if p_lig_ref == 0:

            continue
#         print p_lig_ref
        lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref)
        d_l_atom_substruct = substructTools.retrieveSubstruct(
            lig_ref_parsed, ligand_ID)
        # case with AMP without phosphate
        if d_l_atom_substruct == {}:
            continue
        # write ligand_ID
        for subs in d_l_atom_substruct.keys():
            p_filout_substruct = p_dir_dataset + ref_folder + "/subref_" + subs + "_" + ref_folder + ".pdb"
            writePDBfile.coordinateSection(p_filout_substruct,
                                           d_l_atom_substruct[subs],
                                           "HETATM",
                                           header=0,
                                           connect_matrix=1)

    return 1
Example #14
0
def retrieveSubstructSuperimposed(name_lig,
                                  thresold_BS=4.5,
                                  thresold_superimposed_ribose=2.5,
                                  thresold_superimposed_pi=3,
                                  thresold_shaep=0.4):

    # ouput
    p_dir_dataset = pathManage.dataset(name_lig)
    p_dir_result = pathManage.result(name_lig)
    l_folder_ref = listdir(p_dir_dataset)

    # log control
    p_log = open(p_dir_result + "log_superimposed.txt", "w")

    # control extraction
    d_control = {}
    d_control["pr ref"] = 0
    d_control["lig query"] = 0
    d_control["subref"] = {}
    d_control["subref empty"] = {}
    d_control["out sheap"] = {}
    filout_control = open(p_dir_result + "quality_extraction.txt", "w")

    # stock smile code
    d_smile = {}

    # sheap control
    d_filout_sheap = {}
    d_filout_sheap["list"] = [p_dir_result + "shaep_global.txt"]
    d_filout_sheap["global"] = open(p_dir_result + "shaep_global.txt", "w")
    d_filout_sheap["global"].write(
        "name\tbest_similarity\tshape_similarity\tESP_similarity\n")

    for ref_folder in l_folder_ref:
        # control folder reference name
        if len(ref_folder) != 4:
            p_log.write("[ERROR folder] -> " + ref_folder + "\n")
            continue

        # reference
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/",
                                             name_lig)
        try:
            lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
#             print len (lig_ref_parsed)
        except:
            p_log.write("[ERROR ligand ref] -> " + p_lig_ref + "\n")
            continue

        #control
        d_control["pr ref"] = d_control["pr ref"] + 1

        # output by reference
        p_dir_result_ref = pathManage.result(name_lig + "/" + ref_folder)
        d_filout_superimposed = {}
        d_filout_superimposed["global"] = open(
            p_dir_result_ref + "all_ligand_aligned.pdb", "w")
        d_filout_superimposed["sheap"] = open(
            p_dir_result_ref + "all_ligand_aligned_" + str(thresold_shaep) +
            ".pdb", "w")

        # write lig ref -> connect matrix corrrect in all reference and all sheap
        writePDBfile.coordinateSection(d_filout_superimposed["global"],
                                       lig_ref_parsed,
                                       "HETATM",
                                       connect_matrix=1)
        writePDBfile.coordinateSection(d_filout_superimposed["sheap"],
                                       lig_ref_parsed,
                                       "HETATM",
                                       connect_matrix=1)

        # inspect folder dataset
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile:
            # no ligand file
            if len(pdbfile.split("_")) == 1:
                continue
            pdbfile = pdbfile[:-4]  # remove extention

            if len(pdbfile.split("_")[0]) == 3 and len(pdbfile.split(
                    "_")[1]) == 4 and pdbfile.split("_")[1] != ref_folder:
                p_lig = p_dir_dataset + ref_folder + "/" + pdbfile + ".pdb"
                if p_lig_ref != p_lig:
                    # pass case where ligand replace same ligand -> does not need run
                    if pdbfile.split("_")[0] == name_lig:
                        p_log.write("[REMOVE] -> same ligand substituate")
                        continue

                    # parsed ligand query
                    lig_parsed = parsePDB.loadCoordSectionPDB(p_lig, "HETATM")

                    # find matrix of rotation
                    p_matrix = pathManage.findMatrix(p_lig_ref, p_lig,
                                                     name_lig)
                    # control file matrix exist
                    if not path.exists(p_matrix):
                        p_log.write("[ERROR] -> Matrix transloc " + p_lig_ref +
                                    " " + p_lig + " " + name_lig + "\n")
                        continue

                    # control
                    d_control["lig query"] = d_control["lig query"] + 1

                    # find the path of complex used
                    p_complex = p_dir_dataset + ref_folder + "/" + p_lig.split(
                        "/")[-1][4:]

                    # ligand rotated -> change the referentiel
                    superposeStructure.applyMatrixLigand(lig_parsed, p_matrix)

                    # use substruct
                    l_p_substruct_ref = pathManage.findSubstructRef(
                        pathManage.dataset(name_lig) + ref_folder + "/",
                        name_lig)
                    for p_substruct_ref in l_p_substruct_ref:
                        # ribose or phosphate
                        struct_type = p_substruct_ref.split("_")[-2]
                        substruct_parsed = parsePDB.loadCoordSectionPDB(
                            p_substruct_ref, "HETATM")

                        l_atom_substituate = neighborSearch.searchNeighborAtom(
                            substruct_parsed,
                            lig_parsed,
                            struct_type,
                            p_log,
                            thresold_superimposed_ribose=
                            thresold_superimposed_ribose,
                            thresold_superimposed_pi=thresold_superimposed_pi)
                        # control find
                        if len(l_atom_substituate) == 0:
                            if not struct_type in d_control[
                                    "subref empty"].keys():
                                d_control["subref empty"][struct_type] = 1
                            else:
                                d_control["subref empty"][
                                    struct_type] = d_control["subref empty"][
                                        struct_type] + 1
                            continue

                        else:
                            if not struct_type in d_control["subref"].keys():
                                d_control["subref"][struct_type] = 1
                            else:
                                d_control["subref"][struct_type] = d_control[
                                    "subref"][struct_type] + 1

                            # write PDB file, convert smile
                            p_substituate_pdb = p_dir_result_ref + "substituent_" + pdbfile.split(
                                "_")[0] + "_" + pdbfile.split(
                                    "_")[1] + "_" + struct_type + ".pdb"
                            writePDBfile.coordinateSection(p_substituate_pdb,
                                                           l_atom_substituate,
                                                           recorder="HETATM",
                                                           header=0,
                                                           connect_matrix=1)

                            # sheap reference on part of ligand
                            p_sheap = runOtherSoft.runShaep(
                                p_substruct_ref,
                                p_substituate_pdb,
                                p_substituate_pdb[0:-4] + ".hit",
                                clean=0)
                            val_sheap = parseShaep.parseOutputShaep(p_sheap)
                            if val_sheap == {}:
                                p_log.write("[ERROR] -> ShaEP " +
                                            p_substituate_pdb + " " +
                                            p_substruct_ref + "\n")

                                if not struct_type in d_control[
                                        "out sheap"].keys():
                                    d_control["out sheap"][struct_type] = 1
                                else:
                                    d_control["out sheap"][
                                        struct_type] = d_control["out sheap"][
                                            struct_type] + 1
                                continue

                            # control thresold sheap
                            if not struct_type in d_filout_sheap.keys():
                                d_filout_sheap[struct_type] = {}
                                d_filout_sheap[struct_type] = open(
                                    p_dir_result + "shaep_global_" +
                                    struct_type + ".txt", "w")
                                d_filout_sheap[struct_type].write(
                                    "name\tbest_similarity\tshape_similarity\tESP_similarity\n"
                                )
                                d_filout_sheap["list"].append(
                                    p_dir_result + "shaep_global_" +
                                    struct_type +
                                    ".txt")  # to improve with python function

                            # write value in ShaEP control
                            d_filout_sheap[struct_type].write(
                                ref_folder + "_" + str(pdbfile.split("_")[1]) +
                                "_" + struct_type + "_" +
                                str(pdbfile.split("_")[0]) + "\t" +
                                str(val_sheap["best_similarity"]) + "\t" +
                                str(val_sheap["shape_similarity"]) + "\t" +
                                str(val_sheap["ESP_similarity"]) + "\n")
                            d_filout_sheap["global"].write(
                                ref_folder + "_" + str(pdbfile.split("_")[1]) +
                                "_" + struct_type + "_" +
                                str(pdbfile.split("_")[0]) + "\t" +
                                str(val_sheap["best_similarity"]) + "\t" +
                                str(val_sheap["shape_similarity"]) + "\t" +
                                str(val_sheap["ESP_similarity"]) + "\n")

                            # rename file substituent with shaEP value
                            rename(
                                p_substituate_pdb,
                                p_substituate_pdb[:-4] + "_" +
                                str(val_sheap["best_similarity"]) + ".pdb")
                            # rename and change the file name
                            p_substituate_pdb = p_substituate_pdb[:-4] + "_" + str(
                                val_sheap["best_similarity"]) + ".pdb"

                            # write all substruct in global file
                            writePDBfile.coordinateSection(
                                d_filout_superimposed["global"],
                                lig_parsed,
                                recorder="HETATM",
                                header=str(p_lig.split("/")[-1]) + "_" +
                                str(val_sheap["best_similarity"]),
                                connect_matrix=1)

                            # control sheap thresold
                            if float(val_sheap["best_similarity"]
                                     ) >= thresold_shaep:

                                # write subligand superimposed selected in global files
                                writePDBfile.coordinateSection(
                                    d_filout_superimposed["sheap"],
                                    lig_parsed,
                                    recorder="HETATM",
                                    header=str(p_lig.split("/")[-1]) + "_" +
                                    str(val_sheap["best_similarity"]),
                                    connect_matrix=1)

                                ############
                                # write BS #
                                ############
                                # not only protein superimposed -> also ion and water
                                l_atom_complex = parsePDB.loadCoordSectionPDB(
                                    p_complex)
                                superposeStructure.applyMatrixProt(
                                    l_atom_complex, p_matrix)
                                p_file_cx = p_dir_result_ref + "CX_" + p_lig.split(
                                    "/")[-1]
                                # write CX
                                writePDBfile.coordinateSection(
                                    p_file_cx,
                                    l_atom_complex,
                                    recorder="ATOM",
                                    header=p_lig.split("/")[-1],
                                    connect_matrix=0)

                                # search atom in BS
                                l_atom_binding_site = []
                                for atom_complex in l_atom_complex:
                                    for atom_substruct in lig_parsed:
                                        if parsePDB.distanceTwoatoms(
                                                atom_substruct,
                                                atom_complex) <= thresold_BS:
                                            if not atom_complex in l_atom_binding_site:
                                                l_atom_binding_site.append(
                                                    deepcopy(atom_complex))

                                # 3. retrieve complet residue
                                l_atom_BS_res = parsePDB.getResidues(
                                    l_atom_binding_site, l_atom_complex)

                                # 4. write binding site
                                p_binding = p_dir_result_ref + "BS_" + p_lig.split(
                                    "/")[-1]
                                writePDBfile.coordinateSection(
                                    p_binding,
                                    l_atom_BS_res,
                                    "ATOM",
                                    p_binding,
                                    connect_matrix=0)

                                # smile code substituate analysis
                                # Step smile -> not conversion if shaep not validate
                                smile_find = runOtherSoft.babelConvertPDBtoSMILE(
                                    p_substituate_pdb)
                                if not struct_type in d_smile.keys():
                                    d_smile[struct_type] = {}
                                    d_smile[struct_type][smile_find] = {}
                                    d_smile[struct_type][smile_find][
                                        "count"] = 1
                                    d_smile[struct_type][smile_find]["PDB"] = [
                                        pdbfile.split("_")[1]
                                    ]
                                    d_smile[struct_type][smile_find][
                                        "ligand"] = [pdbfile.split("_")[0]]
                                    d_smile[struct_type][smile_find]["ref"] = [
                                        ref_folder
                                    ]
                                else:
                                    if not smile_find in d_smile[
                                            struct_type].keys():
                                        d_smile[struct_type][smile_find] = {}
                                        d_smile[struct_type][smile_find][
                                            "count"] = 1
                                        d_smile[struct_type][smile_find][
                                            "PDB"] = [pdbfile.split("_")[1]]
                                        d_smile[struct_type][smile_find][
                                            "ligand"] = [
                                                pdbfile.split("_")[0]
                                            ]
                                        d_smile[struct_type][smile_find][
                                            "ref"] = [ref_folder]
                                    else:
                                        d_smile[struct_type][smile_find][
                                            "count"] = d_smile[struct_type][
                                                smile_find]["count"] + 1
                                        d_smile[struct_type][smile_find][
                                            "PDB"].append(
                                                pdbfile.split("_")[1])
                                        d_smile[struct_type][smile_find][
                                            "ligand"].append(
                                                pdbfile.split("_")[0])
                                        d_smile[struct_type][smile_find][
                                            "ref"].append(ref_folder)

                            else:
                                if not struct_type in d_control[
                                        "out sheap"].keys():
                                    d_control["out sheap"][struct_type] = 1
                                else:
                                    d_control["out sheap"][
                                        struct_type] = d_control["out sheap"][
                                            struct_type] + 1

        tool.closeDicoFile(d_filout_superimposed)

    # sheap control
    tool.closeDicoFile(d_filout_sheap)
    for p_file_sheap in d_filout_sheap["list"]:
        runOtherSoft.RhistogramMultiple(p_file_sheap)

    # write list of smile
    for substruct in d_smile.keys():
        p_list_smile = pathManage.result(
            name_lig) + "list_" + substruct + "_" + str(
                thresold_shaep) + "_smile.txt"
        filout_smile = open(p_list_smile, "w")
        for smile_code in d_smile[substruct].keys():
            l_lig = d_smile[substruct][smile_code]["ligand"]
            l_PDB = d_smile[substruct][smile_code]["PDB"]
            l_ref = d_smile[substruct][smile_code]["ref"]
            filout_smile.write(
                str(smile_code) + "\t" +
                str(d_smile[substruct][smile_code]["count"]) + "\t" +
                " ".join(l_PDB) + "\t" + " ".join(l_ref) + "\t" +
                " ".join(l_lig) + "\n")
        filout_smile.close()
    p_log.close()

    # control
    filout_control.write("NB ref: " + str(d_control["pr ref"]) + "\n")
    filout_control.write("Ligand query: " + str(d_control["lig query"]) + "\n")
    for k in d_control["subref"].keys():
        filout_control.write("LSR " + str(k) + ": " +
                             str(d_control["subref"][k]) + "\n")
    for k in d_control["subref empty"].keys():
        filout_control.write("NB LSR empty " + str(k) + ": " +
                             str(d_control["subref empty"][k]) + "\n")
    for k in d_control["out sheap"].keys():
        filout_control.write("LSR out by sheap " + str(k) + ": " +
                             str(d_control["out sheap"][k]) + "\n")

    filout_control.write("**********************\n\n")
    for k in d_control["subref"].keys():
        filout_control.write("LSR keep" + str(k) + ": " +
                             str(d_control["subref"][k] -
                                 d_control["out sheap"][k]) + "\n")

    filout_control.close()

    return 1
Example #15
0
def analyseIons (pr_dataset, name_ligand, p_filout, thresold_max_interaction = 4.0) : 

    l_folder_ref = listdir(pr_dataset)

    filout = open (p_filout, "w")
    if name_ligand == "ATP" : 
        filout.write ("PDB\tIon\tD1\tD2\tD3\tAngle1\tAngle2\tAt1\tAt2\tA3\n")
    else : 
        filout.write ("PDB\tIon\tD1\tD2\tAngle\tAt1\tAt2\n")
    
    # dictionnary of counting
    d_count = {}
    d_count["CX"] = 0
    d_count["CX + ions"] = 0
    d_count["BS + ions"] = 0
    d_count["BS + 1-ion"] = 0
    d_count["BS + 2-ions"] = 0
    d_count["BS + more-ions"] = 0
    d_count["Interact-1"] = 0
    d_count["Interact-2"] = 0

    
    # dictionnary by ions
    d_ions = {}
    
    for ref_folder in l_folder_ref  :
        only_one = 0
        if len (ref_folder) != 4 : 
            continue
        d_count["CX"] = d_count["CX"] + 1
        l_temp = []
        # path and complex
        p_lig_ref = pathManage.findligandRef(pr_dataset + ref_folder + "/", name_ligand)
        p_complex = pathManage.findPDBRef(pr_dataset + ref_folder + "/")
    
        # parsing
        lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
        l_het_parsed = parsePDB.loadCoordSectionPDB(p_complex, "HETATM")
    
        # retrieve phosphate
        l_pi = retrieveTwoAtomForAngle (lig_ref_parsed, name_ligand)
        if l_pi == [] : # case ligand without phosphate 
            continue 
        flag_interact = 0
        flag_between_1 = 0
        flag_between_2 = 0
        for het_parsed in l_het_parsed : 
            if het_parsed["resName"] in l_ions : 
                d_count["CX + ions"] = d_count["CX + ions"] + 1
                if not het_parsed ["resName"] in d_ions.keys () : 
                    d_ions[het_parsed["resName"]] = 0
                if not het_parsed["resName"] in l_temp :  
                    d_ions[het_parsed["resName"]] = d_ions[het_parsed["resName"]] + 1
                    l_temp.append (het_parsed["resName"])
                PDB_id = ref_folder
                d1 = parsePDB.distanceTwoatoms(l_pi[0], het_parsed)
                d2 = parsePDB.distanceTwoatoms(l_pi[1], het_parsed)
                if name_ligand == "ATP" : 
                    # print len(l_pi), ref_folder, p_lig_ref
                    d3 = parsePDB.distanceTwoatoms(l_pi[2], het_parsed)
                    angle_bis = parsePDB.angleVector(l_pi[1], het_parsed, l_pi[2])
                angle = parsePDB.angleVector(l_pi[0], het_parsed, l_pi[1])
            
                if d1 < 10 and d2 < 10 : 
                    if not het_parsed["resName"] in d_count.keys () : 
                        d_count[het_parsed["resName"]] = 0
                    if only_one == 0 : 
                        d_count[het_parsed["resName"]] = d_count[het_parsed["resName"]] + 1
                        only_one = 1
                    d_count["BS + ions"] = d_count["BS + ions"] + 1
                    flag_interact = flag_interact + 1
                    if d1 < thresold_max_interaction and d2 < thresold_max_interaction : 
                        flag_between_1 = flag_between_1 + 1


                    if name_ligand == "ATP" :
                        if d3 < thresold_max_interaction and d2 < thresold_max_interaction : 
                            flag_between_2 = flag_between_2 + 1
                        filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str (d3) + "\t" + str(angle) + "\t" + str(angle_bis) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\t" + str(l_pi[2]["serial"]) + "\n")
                    else : 
                        filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str(angle) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\n")
    
        if flag_interact == 1 : 
            d_count["BS + 1-ion"] = d_count["BS + 1-ion"] + 1
        elif flag_interact == 2 : 
            d_count["BS + 2-ions"] = d_count["BS + 2-ions"] + 1
        elif flag_interact > 2 : 
            d_count["BS + more-ions"] = d_count["BS + more-ions"] + 1

        if flag_between_1 >= 1 : 
            d_count["Interact-1"] = d_count["Interact-1"] + flag_between_1
        if flag_between_2 >= 1 : 
            d_count["Interact-2"] = d_count["Interact-2"] + flag_between_2 

    filout.close ()
    
    filout_count = open (p_filout[0:-4] + "count.txt", "w")
    filout_count.write ("CX: " + str (d_count["CX"]) + "\n")
    filout_count.write ("CX + ions: " + str (d_count["CX + ions"]) + "\n")
    filout_count.write ("BS + ions: " + str(d_count["BS + ions"]) + "\n")
    filout_count.write ("BS + 1-ion: " + str(d_count["BS + 1-ion"]) + "\n")
    filout_count.write ("BS + 2-ions: " + str(d_count["BS + 2-ions"]) + "\n")
    filout_count.write ("BS + more-ions: " + str(d_count["BS + more-ions"]) + "\n")
    filout_count.write ("Interact Pi-alpha + Pi-beta: " + str(d_count["Interact-1"]) + "\n")
    filout_count.write ("Interact Pi-beta + Pi-gama: " + str(d_count["Interact-2"]) + "\n")
    filout_count.close ()

    filout_by_ion = open(p_filout[0:-4] + "byIons_" + name_ligand, "w")
    l_k = d_ions.keys ()
    for k in l_k : 
        filout_by_ion.write (str (k.capitalize()) + "\t" + str (d_ions[k]) + "\n")
    filout_by_ion.close ()
   
    runOtherSoft.barplot (p_filout[0:-4] + "byIons_" + name_ligand)
Example #16
0
def computeRMSDBS (p_ref, p_query, p_substruct, pr_result, thresold_BS = 6) :
    
    
    l_atom_query_parsed = parsePDB.loadCoordSectionPDB(p_query, "ATOM")
    l_atom_ref_parsed = parsePDB.loadCoordSectionPDB(p_ref, "ATOM")
    
    l_atom_substruct = parsePDB.loadCoordSectionPDB(p_substruct)
    
    
    
    l_BS_ref = []
    
    for atom_substruct in l_atom_substruct : 
        for atom_ref in l_atom_ref_parsed : 
            d_atom = parsePDB.distanceTwoatoms(atom_substruct, atom_ref)
            if d_atom <= thresold_BS : 
                l_BS_ref.append (atom_ref)
    # retrieve residue full
    l_BS_ref = parsePDB.getResidues(l_BS_ref, l_atom_ref_parsed)
    
#     print len (l_BS_ref)
#     print len (l_atom_query_parsed)
    
    
    l_BS_query = []
    flag_identic_crystal = 1
    for atomBS_ref in l_BS_ref :
#         print  atomBS_parsed 
        d_max = 100.0 
        for atom_query in l_atom_query_parsed :
            if atom_query["resName"] ==  atomBS_ref["resName"] and atom_query["name"] ==  atomBS_ref["name"] : 
                d = parsePDB.distanceTwoatoms(atom_query, atomBS_ref)
                if d < d_max : 
                    d_max = d
                    res_temp = atom_query
                
        
        #if d_max < thresold_BS : 
        if "res_temp" in locals () :     
            l_BS_query.append (deepcopy(res_temp))
        # identic check number
            if res_temp["resSeq"] != atomBS_ref["resSeq"] : 
                flag_identic_crystal = 0
        #else : 
            # case structure not found
        #    return []
    
    
#     print len (l_BS_query), len (l_BS_ref)
    l_RMSD = RMSDTwoList (l_BS_query, l_BS_ref)
    
    # write PDB
    #p_filout_pdb = pr_result + p_query.split ("/")[-1][0:-4] + "_" + str (flag_identic_crystal) + "_" + p_substruct.split ("_")[-2] + "_" + p_ref.split ("/")[-1]
    #filout_pdb = open (p_filout_pdb, "w")
    #writePDBfile.coordinateSection(filout_pdb, l_BS_ref, recorder = "ATOM")
    #writePDBfile.coordinateSection(filout_pdb, l_BS_query, recorder = "ATOM", header = 0 )
    #filout_pdb.close ()
    
    if l_RMSD == [] : 
        return []
    else : 
        return l_RMSD + [flag_identic_crystal]