Python loadCoordSectionPDB Examples

Programming Language: Python

Namespace/Package Name: parsePDB

Method/Function: loadCoordSectionPDB

Examples at hotexamples.com: 16

Python loadCoordSectionPDB - 16 examples found. These are the top rated real world Python examples of parsePDB.loadCoordSectionPDB extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: main.py Project: ABorrel/LSRs

def datasetPreparation (ligand_ID, clean = 1):

    p_dir_dataset = pathManage.dataset(ligand_ID)
    l_folder = listdir(p_dir_dataset)
    indent = 0

    for ref_folder in l_folder  :
        # file include in dataset folder
        if len (ref_folder) != 4:
            continue
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        indent = indent + 1
        print ref_folder, indent

        # clean repertory -> only PDB ref and PDB
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        if clean == 1 : 
            for pdbfile in l_pdbfile : 
                p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile
                if not search (".pdb", pdbfile ) or search ("subref", pdbfile) or len (pdbfile.split("_")[0]) == 3: 
                    remove (p_file_pdb)

        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile : 
            p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile
            # extract ligand in PDB
            l_ligand = parsePDB.retrieveListLigand(p_file_pdb)
#             print l_ligand
            if l_ligand == []:
                continue
            else:
                l_atom_pdb_parsed = parsePDB.loadCoordSectionPDB(p_file_pdb)
                for name_ligand in l_ligand : 
                    l_lig_parsed = parsePDB.retrieveLigand(l_atom_pdb_parsed, name_ligand)
                    if l_lig_parsed == [] : 
                        continue
                    p_filout_ligand = p_dir_dataset + ref_folder + "/" + name_ligand + "_" + path.split(p_file_pdb)[1]
                    writePDBfile.coordinateSection(p_filout_ligand , l_lig_parsed[0], "HETATM", header=0 , connect_matrix = 1)

        # ligand_ID write for shaep
#         print p_dir_dataset + ref_folder + "/"
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/", ligand_ID)
        if p_lig_ref == 0:

            continue
#         print p_lig_ref
        lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref)
        d_l_atom_substruct = substructTools.retrieveSubstruct(lig_ref_parsed, ligand_ID)
        # case with AMP without phosphate
        if d_l_atom_substruct == {}:
            continue
        # write ligand_ID
        for subs in d_l_atom_substruct.keys ():
            p_filout_substruct = p_dir_dataset + ref_folder + "/subref_" +  subs + "_" + ref_folder + ".pdb"
            writePDBfile.coordinateSection(p_filout_substruct , d_l_atom_substruct [subs], "HETATM", header=0 , connect_matrix = 1)

    return 1

Example #2

Show file

File: smileAnalysis.py Project: papoku/LSRs

def searchReplacement (smile, PDB_query, PDB_ref, name_ligand, in_cycle = 0) : 
    
    metal_find  = searchMetal (smile)
    if metal_find != 0 : 
        p_dir_dataset = pathManage.dataset(name_ligand + "/" + PDB_ref)
        l_PDB_query = pathManage.findPDBQueryDataset(p_dir_dataset)
        for p_query in l_PDB_query : 
            if search (PDB_query, p_query): 
                p_PDB_query = p_query
                break
        if "p_PDB_query" in locals() : 
            l_atom_parsed = parsePDB.loadCoordSectionPDB(p_query)
            l_ions_PDB = parsePDB.retrieveListIon(l_atom_parsed)
            
            if metal_find in l_ions_PDB : 
                l_atom_ion = parsePDB.retrieveLigand(l_atom_parsed, metal_find)
                filout = open (p_dir_dataset + str(metal_find) + "_" + p_query.split("/")[-1], "w")
                for atom_ion in l_atom_ion : 
                    writePDBfile.coordinateSection(filout, atom_ion, recorder = "HETATM", header = str(metal_find), connect_matrix = 0)
                filout.close ()
                return "metal", metal_find
    


    if in_cycle == 0:
        if searchRing(smile) == 1 : 
            return "cycle",""      
    if searchP(smile) == 1 : 
        return "P", ""
    elif searchB(smile) == 1 : 
        return "B",""
    elif searchF (smile) == 1 :
        return "F", ""
    elif searchCl (smile) == 1 :
        return "Cl", ""
    elif searchBr (smile) == 1 :
        return "Br", ""
    elif searchBe (smile) == 1 : 
        return "Be", ""
    elif searchNO2 (smile) == 1 : 
        return "NO2", ""
    elif searchSulfonyl(smile) == 1: 
        return "SO2",""
    elif searchS (smile) == 1 :
        return "S", ""
    elif searchCON (smile) == 1 : 
        return "CON",""
    elif searchCarboxy (smile) == 1 : 
        return "COO",""
    elif searchConly(smile) == 1 :
        return "onlyC", ""
    elif searchCandO (smile) == 1 : 
        return "C+O", ""
    elif searchCandN (smile) == 1 : 
        return "C+N", "" 
    elif searchCandOandN (smile) == 1 :
        return "C+O+N", ""

    return "other"  ,""

Example #3

Show file

File: tool.py Project: ABorrel/LSRs

def removeChain (path_protein_PDB, p_dir_out) : 
    
    name_file = path.split(path_protein_PDB)[-1]
    path_filout = p_dir_out + name_file
    
    list_atom = parsePDB.loadCoordSectionPDB(path_protein_PDB, section="ATOM")
    
    for atom in list_atom : 
        atom["chainID"] = ""
    
    writePDBfile.coordinateSection(path_filout, list_atom, "ATOM")
    
    return path_filout

Example #4

Show file

def removeChain(path_protein_PDB, p_dir_out):

    name_file = path.split(path_protein_PDB)[-1]
    path_filout = p_dir_out + name_file

    list_atom = parsePDB.loadCoordSectionPDB(path_protein_PDB, section="ATOM")

    for atom in list_atom:
        atom["chainID"] = ""

    writePDBfile.coordinateSection(path_filout, list_atom, "ATOM")

    return path_filout

Example #5

Show file

def manageTMalign(path_protein):

    list_atoms = parsePDB.loadCoordSectionPDB(path_protein)
    dico_residues = parsePDB.arrangeResidues(list_atoms)
    list_res = dico_residues.keys()
    list_res.sort()

    filout = open(path_protein, "w")
    for resID in list_res:
        for atom in dico_residues[resID]:
            writePDBfile.coordinateStructure(atom, "ATOM", filout)

    filout.close()
    return path_protein

Example #6

Show file

File: superposeStructure.py Project: ABorrel/LSRs

def manageTMalign (path_protein ) : 
    
    list_atoms = parsePDB.loadCoordSectionPDB(path_protein)
    dico_residues = parsePDB.arrangeResidues(list_atoms)
    list_res = dico_residues.keys()
    list_res.sort ()
    
    filout = open (path_protein, "w")
    for resID in list_res : 
        for atom in dico_residues[resID] : 
            writePDBfile.coordinateStructure(atom, "ATOM", filout)
            
    
    filout.close ()
    return path_protein

Example #7

Show file

File: buildData.py Project: ABorrel/LSRs

def separeByChain (path_PDB_file):
    """
    separe PDB file by chain
    """
    file_PDB_parsed = parsePDB.loadCoordSectionPDB(path_PDB_file, section="")
    
    l_file = []
    file_open_write = {}
    for atom_PDB in file_PDB_parsed : 
        chain = atom_PDB["chainID"]
        if not chain in file_open_write.keys () : 
            file_open_write [chain] = open(path_PDB_file[0:-4] + "_" + chain + ".pdb", "w")
            l_file.append (path_PDB_file[0:-4] + "_" + chain + ".pdb")
            writePDBfile.coordinateStructure(atom_PDB, file_open_write [chain] )
        else : 
            writePDBfile.coordinateStructure(atom_PDB, file_open_write [chain] )
    
    # close files
    for chain in file_open_write.keys () : 
        file_open_write[chain].close ()
        
    return l_file

Example #8

Show file

File: buildData.py Project: papoku/LSRs

def separeByChain(path_PDB_file):
    """
    separe PDB file by chain
    """
    file_PDB_parsed = parsePDB.loadCoordSectionPDB(path_PDB_file, section="")

    l_file = []
    file_open_write = {}
    for atom_PDB in file_PDB_parsed:
        chain = atom_PDB["chainID"]
        if not chain in file_open_write.keys():
            file_open_write[chain] = open(
                path_PDB_file[0:-4] + "_" + chain + ".pdb", "w")
            l_file.append(path_PDB_file[0:-4] + "_" + chain + ".pdb")
            writePDBfile.coordinateStructure(atom_PDB, file_open_write[chain])
        else:
            writePDBfile.coordinateStructure(atom_PDB, file_open_write[chain])

    # close files
    for chain in file_open_write.keys():
        file_open_write[chain].close()

    return l_file

Example #9

Show file

File: main.py Project: ABorrel/LSRs

def retrieveSubstructSuperimposed (name_lig, thresold_BS = 4.5, thresold_superimposed_ribose = 2.5, thresold_superimposed_pi = 3, thresold_shaep = 0.4):

    # ouput
    p_dir_dataset = pathManage.dataset(name_lig)
    p_dir_result = pathManage.result(name_lig )
    l_folder_ref = listdir(p_dir_dataset)

    # log control
    p_log = open(p_dir_result + "log_superimposed.txt", "w")

    # control extraction
    d_control = {}
    d_control["pr ref"] = 0
    d_control["lig query"] = 0
    d_control["subref"] = {}
    d_control["subref empty"] = {}
    d_control["out sheap"] = {}
    filout_control = open (p_dir_result + "quality_extraction.txt", "w")

    # stock smile code
    d_smile = {}

    # sheap control
    d_filout_sheap = {}
    d_filout_sheap ["list"] = [p_dir_result + "shaep_global.txt"]
    d_filout_sheap["global"] = open (p_dir_result + "shaep_global.txt", "w") 
    d_filout_sheap["global"].write ("name\tbest_similarity\tshape_similarity\tESP_similarity\n")

    for ref_folder in l_folder_ref :
        # control folder reference name
        if len (ref_folder) != 4 : 
            p_log.write ("[ERROR folder] -> " + ref_folder + "\n")
            continue

        # reference
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/", name_lig)
        try:
            lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
#             print len (lig_ref_parsed)
        except:
            p_log.write ("[ERROR ligand ref] -> " + p_lig_ref + "\n")
            continue

        #control
        d_control["pr ref"] = d_control["pr ref"] + 1

        # output by reference
        p_dir_result_ref = pathManage.result(name_lig + "/" + ref_folder)
        d_filout_superimposed = {}
        d_filout_superimposed["global"] = open (p_dir_result_ref + "all_ligand_aligned.pdb", "w")
        d_filout_superimposed["sheap"] = open (p_dir_result_ref + "all_ligand_aligned_" + str (thresold_shaep)  + ".pdb", "w")
        
        
        
        # write lig ref -> connect matrix corrrect in all reference and all sheap
        writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_ref_parsed, "HETATM", connect_matrix = 1)
        writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_ref_parsed, "HETATM", connect_matrix = 1)
        
        # inspect folder dataset
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile : 
            # no ligand file
            if len (pdbfile.split ("_")) == 1 : 
                continue
            pdbfile = pdbfile[:-4] # remove extention
            
            if len(pdbfile.split ("_")[0]) == 3  and len(pdbfile.split ("_")[1]) == 4 and pdbfile.split ("_")[1] != ref_folder:
                p_lig = p_dir_dataset + ref_folder + "/" + pdbfile  + ".pdb"
                if p_lig_ref != p_lig : 
                    # pass case where ligand replace same ligand -> does not need run
                    if pdbfile.split ("_")[0] == name_lig : 
                        p_log.write ("[REMOVE] -> same ligand substituate")
                        continue
                    
                    # parsed ligand query
                    lig_parsed = parsePDB.loadCoordSectionPDB(p_lig, "HETATM")

                    # find matrix of rotation
                    p_matrix = pathManage.findMatrix(p_lig_ref, p_lig, name_lig)
                    # control file matrix exist
                    if not path.exists(p_matrix) : 
                        p_log.write ("[ERROR] -> Matrix transloc " + p_lig_ref + " " + p_lig + " " + name_lig + "\n")
                        continue
                    
                    # control
                    d_control["lig query"] = d_control["lig query"] + 1
                    
                    # find the path of complex used
                    p_complex = p_dir_dataset + ref_folder + "/" + p_lig.split ("/")[-1][4:]
                    
                    # ligand rotated -> change the referentiel
                    superposeStructure.applyMatrixLigand(lig_parsed, p_matrix)
                    
                    
                    # use substruct
                    l_p_substruct_ref = pathManage.findSubstructRef (pathManage.dataset(name_lig) + ref_folder + "/" , name_lig)
                    for p_substruct_ref in l_p_substruct_ref : 
                        # ribose or phosphate
                        struct_type = p_substruct_ref.split ("_")[-2]
                        substruct_parsed = parsePDB.loadCoordSectionPDB(p_substruct_ref, "HETATM")
                        
                        l_atom_substituate = neighborSearch.searchNeighborAtom(substruct_parsed, lig_parsed, struct_type, p_log, thresold_superimposed_ribose = thresold_superimposed_ribose, thresold_superimposed_pi = thresold_superimposed_pi)    
                        # control find 
                        if len (l_atom_substituate) == 0 :  
                            if not struct_type in d_control["subref empty"].keys () : 
                                d_control["subref empty"][struct_type] = 1
                            else : 
                                d_control["subref empty"][struct_type] = d_control["subref empty"][struct_type] + 1
                            continue
                        
                        else : 
                            if not struct_type in d_control["subref"].keys () : 
                                d_control["subref"][struct_type] = 1
                            else : 
                                d_control["subref"][struct_type] = d_control["subref"][struct_type] + 1
                            
                            # write PDB file, convert smile
                            p_substituate_pdb = p_dir_result_ref + "substituent_" + pdbfile.split ("_")[0] + "_" + pdbfile.split ("_")[1] + "_" + struct_type + ".pdb"
                            writePDBfile.coordinateSection(p_substituate_pdb, l_atom_substituate, recorder="HETATM", header=0, connect_matrix = 1)
    
                            # sheap reference on part of ligand
                            p_sheap = runOtherSoft.runShaep (p_substruct_ref, p_substituate_pdb, p_substituate_pdb[0:-4] + ".hit", clean = 0)
                            val_sheap = parseShaep.parseOutputShaep (p_sheap)
                            if val_sheap == {} : 
                                p_log.write ("[ERROR] -> ShaEP " + p_substituate_pdb + " " + p_substruct_ref + "\n")
                                
                                if not struct_type in d_control["out sheap"].keys () :
                                    d_control["out sheap"][struct_type] = 1
                                else : 
                                    d_control["out sheap"][struct_type] = d_control["out sheap"][struct_type] + 1
                                continue
                            
                            # control thresold sheap
                            if not struct_type in d_filout_sheap.keys () : 
                                d_filout_sheap[struct_type] = {}
                                d_filout_sheap[struct_type] = open (p_dir_result + "shaep_global_" + struct_type + ".txt", "w")
                                d_filout_sheap[struct_type].write ("name\tbest_similarity\tshape_similarity\tESP_similarity\n")
                                d_filout_sheap["list"].append (p_dir_result + "shaep_global_" + struct_type + ".txt") # to improve with python function
                            
                            # write value in ShaEP control
                            d_filout_sheap[struct_type].write (ref_folder + "_" +  str(pdbfile.split ("_")[1]) + "_" + struct_type + "_" + str (pdbfile.split ("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n")
                            d_filout_sheap["global"].write (ref_folder + "_" +  str(pdbfile.split ("_")[1]) + "_" + struct_type + "_" + str (pdbfile.split ("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n")
                            
                            # rename file substituent with shaEP value
                            rename(p_substituate_pdb, p_substituate_pdb[:-4] + "_" + str (val_sheap["best_similarity"]) + ".pdb")
                            # rename and change the file name
                            p_substituate_pdb = p_substituate_pdb[:-4] + "_" + str (val_sheap["best_similarity"]) + ".pdb"
                            
                            # write all substruct in global file
                            writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_parsed, recorder= "HETATM", header = str(p_lig.split ("/")[-1]) + "_" + str (val_sheap["best_similarity"]) ,  connect_matrix = 1)
                            
                            # control sheap thresold    
                            if float(val_sheap["best_similarity"]) >= thresold_shaep  : 
                                
                                # write subligand superimposed selected in global files
                                writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_parsed, recorder= "HETATM", header = str(p_lig.split ("/")[-1]) + "_" + str (val_sheap["best_similarity"]) ,  connect_matrix = 1)
                                
                                ############
                                # write BS #
                                ############
                                # not only protein superimposed -> also ion and water
                                l_atom_complex = parsePDB.loadCoordSectionPDB(p_complex)
                                superposeStructure.applyMatrixProt(l_atom_complex, p_matrix)
                                p_file_cx = p_dir_result_ref +  "CX_" + p_lig.split ("/")[-1]
                                # write CX
                                writePDBfile.coordinateSection(p_file_cx, l_atom_complex, recorder="ATOM", header= p_lig.split ("/")[-1], connect_matrix = 0)
    
                                # search atom in BS
                                l_atom_binding_site = []
                                for atom_complex in l_atom_complex : 
                                    for atom_substruct in lig_parsed : 
                                        if parsePDB.distanceTwoatoms (atom_substruct, atom_complex) <= thresold_BS :
                                            if not atom_complex in l_atom_binding_site : 
                                                l_atom_binding_site.append (deepcopy(atom_complex))
                                
                                # 3. retrieve complet residue
                                l_atom_BS_res = parsePDB.getResidues(l_atom_binding_site, l_atom_complex)
                                                
                                # 4. write binding site
                                p_binding = p_dir_result_ref +  "BS_" + p_lig.split ("/")[-1]
                                writePDBfile.coordinateSection(p_binding, l_atom_BS_res, "ATOM", p_binding, connect_matrix = 0)
                                
                                # smile code substituate analysis                    
                                # Step smile -> not conversion if shaep not validate 
                                smile_find = runOtherSoft.babelConvertPDBtoSMILE(p_substituate_pdb)
                                if not struct_type in d_smile.keys ()  :
                                    d_smile[struct_type] = {}
                                    d_smile[struct_type][smile_find] = {}
                                    d_smile[struct_type][smile_find]["count"] = 1
                                    d_smile[struct_type][smile_find]["PDB"] = [pdbfile.split ("_")[1]]
                                    d_smile[struct_type][smile_find]["ligand"] = [pdbfile.split ("_")[0]]
                                    d_smile[struct_type][smile_find]["ref"] = [ref_folder]
                                else : 
                                    if not smile_find in d_smile[struct_type].keys () : 
                                        d_smile[struct_type][smile_find] = {}
                                        d_smile[struct_type][smile_find]["count"] = 1
                                        d_smile[struct_type][smile_find]["PDB"] = [pdbfile.split ("_")[1]]
                                        d_smile[struct_type][smile_find]["ligand"] = [pdbfile.split ("_")[0]] 
                                        d_smile[struct_type][smile_find]["ref"] = [ref_folder]
                                    else : 
                                        d_smile[struct_type][smile_find]["count"] = d_smile[struct_type][smile_find]["count"] + 1
                                        d_smile[struct_type][smile_find]["PDB"].append (pdbfile.split ("_")[1])
                                        d_smile[struct_type][smile_find]["ligand"].append (pdbfile.split ("_")[0])
                                        d_smile[struct_type][smile_find]["ref"].append (ref_folder)

                            else : 
                                if not struct_type in d_control["out sheap"].keys () : 
                                    d_control["out sheap"][struct_type] = 1
                                else : 
                                    d_control["out sheap"][struct_type] = d_control["out sheap"][struct_type] + 1

        tool.closeDicoFile (d_filout_superimposed)

    # sheap control    
    tool.closeDicoFile (d_filout_sheap)
    for p_file_sheap in d_filout_sheap["list"] : 
        runOtherSoft.RhistogramMultiple (p_file_sheap)    
        
            
    # write list of smile
    for substruct in d_smile.keys () : 
        p_list_smile = pathManage.result(name_lig) + "list_" + substruct + "_" + str (thresold_shaep) + "_smile.txt"
        filout_smile = open (p_list_smile, "w")
        for smile_code in d_smile[substruct].keys () : 
            l_lig = d_smile[substruct][smile_code]["ligand"]
            l_PDB = d_smile[substruct][smile_code]["PDB"]
            l_ref = d_smile[substruct][smile_code]["ref"]
            filout_smile.write (str (smile_code) + "\t" + str (d_smile[substruct][smile_code]["count"]) + "\t" + " ".join (l_PDB) + "\t" + " ".join (l_ref) + "\t" + " ".join(l_lig) + "\n")
        filout_smile.close ()
    p_log.close ()
    
    # control
    filout_control.write ("NB ref: " + str(d_control["pr ref"]) + "\n")
    filout_control.write ("Ligand query: " + str(d_control["lig query"]) + "\n")
    for k in d_control["subref"].keys () :
        filout_control.write ("LSR " + str (k) + ": " + str(d_control["subref"][k]) + "\n")
    for k in d_control["subref empty"].keys () :
        filout_control.write ("NB LSR empty " + str (k) + ": " + str(d_control["subref empty"][k]) + "\n")
    for k in d_control["out sheap"].keys () :
        filout_control.write ("LSR out by sheap " + str (k) + ": " + str(d_control["out sheap"][k]) + "\n")
    
    filout_control.write ("**********************\n\n")
    for k in d_control["subref"].keys () :
        filout_control.write ("LSR keep" + str (k) + ": " + str(d_control["subref"][k] - d_control["out sheap"][k]) + "\n")
    
    filout_control.close ()
    
    return 1

Example #10

Show file

File: arrangeResult.py Project: ABorrel/LSRs

def superpositionAllRef (l_ligand, name_folder_final, debug = 1):   
    
    pr_final = pathManage.result("final_" + name_folder_final)
    pr_align = pathManage.generatePath(pr_final + "refAlignement/")
    
    l_ref = []
    d_filout_pdb = {}
    d_filout_RMSE = {}
    d_ref = {}
    l_file_RMSE = []
    for ligand in l_ligand : 
        d_filout_pdb[ligand] = open (pr_align + ligand + "_" + "superimposed.pdb" , "w")
        d_filout_RMSE[ligand] = open (pr_align + ligand + "_" + "RMSE.txt" , "w")
        l_file_RMSE.append (pr_align + ligand + "_" + "RMSE.txt") 
    
    l_pr_type_ref = listdir(pr_final) 
    for pr_type_ref in l_pr_type_ref : 
        if debug : print "1", pr_type_ref
        # case where pr_substruct is a file not a folder
        try : l_pr_sub = listdir(pr_final + pr_type_ref + "/")
        except : continue

        for pr_sub in l_pr_sub : 
            print "2", pr_sub

            # case cycle -> append in list respertory with new folder
            if pr_sub == "cycle" : 
                l_pr_sub.remove ("cycle")
                l_pr_sub_cycle = listdir (pr_final + pr_type_ref + "/cycle")
                for pr_sub_cycle in l_pr_sub_cycle : 
                    l_pr_sub.append ("cycle/" + pr_sub_cycle)
                break
        
        for pr_sub in l_pr_sub : 
            try : l_pr_ref = listdir (pr_final + pr_type_ref + "/" + pr_sub)
            except : pass
            if debug : print "3", pr_sub
            
            for pr_ref in l_pr_ref : 
                if debug : print "4", pr_ref
                # case no folder
                try : l_file = listdir(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/")
                except : continue
                for name_file in l_file : 
                    if search("LGD_REF_A",name_file) and search(".pdb",name_file): 
                        #print "2222", l_ref
                        if name_file.split("_")[3][:4] in l_ref : 
                            print "!!!!!", "IN"
                            break
                        else : l_ref.append (name_file.split ("_")[3][:4])                       


                        ligand = name_file.split ("_")[2]
                        l_atom_ligand = parsePDB.loadCoordSectionPDB(pr_final + pr_type_ref + "/" + pr_sub  + "/" + pr_ref + "/LGD/" + name_file, "HETATM", remove_H=1)
                        l_atom_adenine = substructTools.retrieveAdenine(l_atom_ligand)
                        if not ligand in d_ref.keys () : 
                            # stock in tempory dictionary for the reference
                            d_ref[ligand] = []
                            d_ref[ligand].append (l_atom_ligand)
                            d_ref[ligand].append (l_atom_adenine)
                            writePDBfile.coordinateSection(d_filout_pdb[ligand], l_atom_ligand, "HETATM", connect_matrix = 1)
                            continue
                        else : 
                            rotation, translocation =  superimpose.rigid_transform_3D(l_atom_adenine, d_ref[ligand][-1])
                            if rotation == None or translocation == None : 
                                continue
                            # rotation + translation
                            l_atom_lig_rotated = superimpose.applyTranformation(rotation, translocation, l_atom_in=l_atom_ligand)
                            # write PDB file and RMSE
#                             print "============"
#                             print ligand, pr_ref
#                             print len (l_atom_lig_rotated)
#                             print len (d_ref[ligand][0])
#                             print "============"
                            if len (l_atom_lig_rotated) != len (d_ref[ligand][0]) : 
                                continue
                        
                            writePDBfile.coordinateSection(d_filout_pdb[ligand], l_atom_lig_rotated, "HETATM", connect_matrix = 1)
                            RMSE_ligand = superimpose.rmse(d_ref[ligand][0], l_atom_lig_rotated)
                            d_filout_RMSE[ligand].write (str (pr_ref) + pr_type_ref  + "\t" + str(RMSE_ligand) + "\n")
            
    # close files
    for lig in d_filout_pdb.keys () : 
        d_filout_pdb[lig].close ()
        d_filout_RMSE[lig].close ()

    for file_RMSE in l_file_RMSE : 
        runOtherSoft.Rhistogram(file_RMSE, "RMSE_Adenine")

Example #11

Show file

File: arrangeResult.py Project: ABorrel/LSRs

def enantiomer(l_ligand, name_folder_final, debug = 1) : 
    "to do file output"
    
    pr_final = pathManage.result("final_" + name_folder_final)
    
    pr_enantiomer = pathManage.generatePath(pr_final + "enantiomer/")
    
    l_ref = []

    d_filout = {}
    for ligand in l_ligand : 
        d_filout[ligand] = {}
        d_filout[ligand]["O3OP"]= open (pr_enantiomer + ligand + "_" + "O3OP" , "w")
        d_filout[ligand]["O4O5"]= open (pr_enantiomer + ligand + "_" + "O4O5" , "w")
        d_filout[ligand]["OPOP"]= open (pr_enantiomer + ligand + "_" + "OPOP" , "w")
        
    l_pr_type_ref = listdir(pr_final) 
    for pr_type_ref in l_pr_type_ref : 
        if debug : print "1", pr_type_ref
        # case where pr_substruct is a file not a folder
        try : l_pr_sub = listdir(pr_final + pr_type_ref + "/")
        except : continue

        for pr_sub in l_pr_sub : 
            print "2", pr_sub

            # case cycle -> append in list respertory with new folder
            if pr_sub == "cycle" : 
                l_pr_sub.remove ("cycle")
                l_pr_sub_cycle = listdir (pr_final + pr_type_ref + "/cycle")
                for pr_sub_cycle in l_pr_sub_cycle : 
                    l_pr_sub.append ("cycle/" + pr_sub_cycle)
                break
        
        for pr_sub in l_pr_sub : 
            try : l_pr_ref = listdir (pr_final + pr_type_ref + "/" + pr_sub)
            except : pass
            if debug : print "3", pr_sub
            
            for pr_ref in l_pr_ref : 
                if debug : print "4", pr_ref
                # case no folder
                try : l_file = listdir(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/")
                except : continue
                for name_file in l_file : 
                    if search("LGD_REF_A",name_file) and search(".pdb",name_file): 
                        #print "2222", l_ref
                        if name_file.split("_")[3][:4] in l_ref : 
                            print "!!!!!", "IN"
                            break
                        else : l_ref.append (name_file.split ("_")[3][:4])                       
 
                        ligand = name_file.split ("_")[2]
                        l_atom_ligand = parsePDB.loadCoordSectionPDB(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/" + name_file, "HETATM")
                        d_minO3OP = 100
                        for atom_ligand in l_atom_ligand : 
                            if atom_ligand["name"] == "O4'" :
                                atom_O4 = atom_ligand
                            elif atom_ligand["name"] == "O5'" :
                                atom_O5 = atom_ligand
                            elif  atom_ligand["name"] == "O3'" :
                                atom_O3 = atom_ligand
                            elif  atom_ligand["name"] == "O1A" :
                                atom_O1A = atom_ligand
                            elif  atom_ligand["name"] == "O2A" :
                                atom_O2A = atom_ligand
                            elif  atom_ligand["name"] == "O1B" :
                                atom_O1B = atom_ligand
                            elif  atom_ligand["name"] == "O2B" :
                                atom_O2B = atom_ligand
                            #elif  atom_ligand["name"] == "O3B" :
                            #    atom_O3B = atom_ligand
                    
                        # d O4 - O5        
                        try : d_O4O5 = parsePDB.distanceTwoatoms(atom_O4, atom_O5)
                        except : continue
                        d_filout[ligand]["O4O5"].write (pr_ref + "_" + pr_type_ref  + "\t" + str (d_O4O5) + "\n")

                        # d O3 - OP
                        for atom_ligand in l_atom_ligand : 
                            if ligand == "AMP" : 
                                if atom_ligand["name"] == "O1P" or atom_ligand["name"] == "O2P" or atom_ligand["name"] == "O3P" : 
                                    d_tempO3OP = parsePDB.distanceTwoatoms(atom_O3, atom_ligand)
                                    if d_tempO3OP < d_minO3OP : 
                                        d_minO3OP = d_tempO3OP
                                        atom_tempO3OP = deepcopy(atom_ligand)
                            else : 
                                if atom_ligand["name"] == "O1A" or atom_ligand["name"] == "O2A" or atom_ligand["name"] == "O3A" : 
                                    d_tempO3OP = parsePDB.distanceTwoatoms(atom_O4, atom_ligand)
                                    if d_tempO3OP < d_minO3OP : 
                                        d_minO3OP = d_tempO3OP
                                        atom_tempO3OP = deepcopy(atom_ligand)
                        d_filout[ligand]["O3OP"].write (pr_ref + "_" + pr_type_ref  +"_" + str(atom_tempO3OP["name"]) + "\t" + str (d_minO3OP) + "\n")
    
                        # d OP OP
                        d_OP = {}
                        if ligand == "ATP" or ligand == "ADP" : 
                            d_OP ["O1AO1B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O1B)
                            d_OP ["O1AO2B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O2B)
                            #d_OP ["O1AO3B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O3B)
                            d_OP ["O2AO1B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O1B)
                            d_OP ["O2AO2B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O2B)
                            #d_OP ["O2AO3B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O3B)
                        
                            d_minOPOP = min (d_OP.values())
                            #print d_minOPOP
                            k_min = [name for name, age in d_OP.items() if age == min (d_OP.values())][0]
                            #print k_min
                            d_filout[ligand]["OPOP"].write (pr_ref + "_" + pr_type_ref  + "_" + str(k_min) + "\t" + str (d_minOPOP) + "\n")
                    
                        try :
                            del d_OP 
                            del atom_O1A
                            del atom_O1B
                            del atom_O2A
                            del atom_O2B
                        except : 
                            pass
                        try : 
                            del atom_O3
                            del atom_O4
                            del atom_O5
                        except :
                            pass
            
    # close files
    for lig in l_ligand : 
        for type_dist in d_filout[lig].keys () : 
            p_file = d_filout[lig][type_dist].name
            d_filout[lig][type_dist].close ()
            runOtherSoft.Rhistogram(p_file, type_dist, brk = 20)

Example #12

Show file

File: arrangeResult.py Project: ABorrel/LSRs

def globalArrangement (pr_orgin, p_smile, p_family, name_ligand, l_ligand_out):
    
#     print "--------"
#     print pr_orgin
#     print p_smile
#     print p_family
#     print name_ligand
#     print "--------"
    
    
    subst = p_smile.split ("_")[-3]
    
    filin = open (p_smile, "r")
    l_line_smile = filin.readlines ()
    filin.close()
    
    for line_smile in l_line_smile : 
        
        # search substructure
#         print line_smile
        l_PDB_query = line_smile.split ("\t")[-3].split (" ")
#         print l_PDB_query
        l_PDB_ref = line_smile.split ("\t")[-2].split (" ")
        l_ligand = line_smile.strip().split ("\t")[-1].split (" ")
        
        # search replacement
        smile = line_smile.split ("\t")[0]
        
        # search if LSR is small -> thresold < 3
        small_LSR = smileAnalysis.smallLSR (smile) 
        if subst == "ribose" :  
            if small_LSR == 1 : 
                first_folder = "ribose_small"
            else : 
                first_folder = "ribose"
        else : 
            if small_LSR == 1 : 
                first_folder = "Pi_small"
            else : 
                first_folder = "Pi"
        
        
        print smile, l_PDB_query, l_PDB_ref, l_ligand, subst, small_LSR
        replacement, metal = smileAnalysis.searchReplacement (smile, l_PDB_query[0], l_PDB_ref[0], name_ligand)
        
        # case with cycle -> search replacement 2
        if replacement == "cycle" : 
            replacement2, metal = smileAnalysis.searchReplacement (smile, l_PDB_query[0], l_PDB_ref[0], name_ligand, in_cycle = 1)
            replacement = replacement + "/" + replacement2 # new folder

        # case metal
        if replacement == "metal" : 
            print metal, l_PDB_query, l_PDB_ref, name_ligand
        
        len_find = len (l_PDB_ref)
        i = 0
        while i < len_find : 
            
            # exclusion of ligand out
            if l_ligand[i] in l_ligand_out : 
                i = i + 1
                continue
            
            
            group, family = analysis.findFamilyAndGroup(l_PDB_ref[i])
            
            # folder reference
            pr_dataset = pathManage.dataset(name_ligand + "/" + l_PDB_ref[i])
            
            PDB_ref = pathManage.findPDBRef(pr_dataset)
            p_ligand_ref = pathManage.findligandRef(pr_dataset , name_ligand)
            l_frag_ref = pathManage.findSubstructRef(pr_dataset, name_ligand)
            for f_ref in l_frag_ref :
                if search (subst, f_ref) : 
                    p_frag_ref = f_ref
                    break
            
            # folder_query
            pr_result = pathManage.result(name_ligand + "/" + l_PDB_ref[i])
            l_protein_tranloc = pathManage.findPDBQueryTransloc(pr_result)
            for p_t in l_protein_tranloc : 
                if search (l_ligand[i], p_t) and search (l_PDB_query[i], p_t) : 
                    p_protein_query = p_t
                    break
                
            if replacement != "metal" : 
                p_lig_query = pathManage.findligandQuery(pr_dataset , l_ligand[i], l_PDB_query[i])
            else : 
                p_lig_query = pathManage.findligandQuery(pr_dataset ,metal, l_PDB_query[i])
            # need apply transloc matrix
            matrix_transloc = pathManage.findMatrix(p_ligand_ref, p_lig_query, name_ligand)
            lig_query_parsed = parsePDB.loadCoordSectionPDB(p_lig_query)
            try : superposeStructure.applyMatrixLigand(lig_query_parsed, matrix_transloc)
            except : 
                i = i + 1
                continue
            
            
            p_lig_substituate = pathManage.findSubstructFind(pr_result, l_ligand[i], l_PDB_query[i], subst)
            l_p_BS = pathManage.findFileBS(pr_result, l_PDB_query[i])
            for BS in l_p_BS : 
                if search (l_ligand[i], BS) : 
                    p_BS = BS
                    break
            
            
#             print pr_final
#             print "***************"
#             print PDB_ref
#             print p_ligand_ref
#             print p_frag_ref
#             print "----"
#             print p_protein_query
#             print p_lig_query
#             print p_lig_substituate
#             print p_BS
#             print "**************"
            # ajouter group + family 2 lettre
            pr_final = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-"  + str (group) + "_" + l_PDB_ref[i] +  "/" 
            pr_ligand = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-" +  str (group) + "_" + l_PDB_ref[i] + "/LGD/"
            pr_BS = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-" + str (group) + "_" + l_PDB_ref[i] + "/BS/"
            pr_sust = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-"  + str (group) + "_" + l_PDB_ref[i] + "/LSR/"
            
            if not path.isdir(pr_final):
                makedirs (pr_final)
            
            if not path.isdir(pr_ligand):
                makedirs (pr_ligand)
            
            if not path.isdir(pr_BS):
                makedirs (pr_BS)
                
            if not path.isdir(pr_sust):
                makedirs (pr_sust)   
            
            # list file
            p_list_smile_queries = pr_sust + "list.smile"
            if not path.exists(p_list_smile_queries) : 
                file_smile_queries = open (p_list_smile_queries, "w")
            else : 
                file_smile_queries = open (p_list_smile_queries, "a")
            file_smile_queries.write (str(smile) + "\n")
            file_smile_queries.close ()
            
            # lig de la query
            writePDBfile.coordinateSection(pr_ligand + "LGD_" + p_lig_query.split ("/")[-1], lig_query_parsed, recorder = "HETATM", header = "LCG_" + p_lig_query.split ("/")[-1], connect_matrix = 1)
            runOtherSoft.babelConvertPDBtoSMILE(pr_ligand + "LGD_" + p_lig_query.split ("/")[-1], clean_smi = 1)
            # lig de reference + smile
            copy2(p_ligand_ref, pr_ligand + "LGD_REF_" + p_ligand_ref.split ("/")[-1])
            runOtherSoft.babelConvertPDBtoSMILE(pr_ligand + "LGD_REF_" + p_ligand_ref.split ("/")[-1])
            # LSR de ref
            copy2(p_frag_ref, pr_sust + "LSR_REF_" + name_ligand + "_" + l_PDB_ref[i] + ".pdb")
            # protein query
            #copy2(p_protein_query, pr_final)
            # LSR query -> p_lig_ref only for the name
            copy2(p_lig_substituate, pr_sust + "LSR_" + subst + "_"  + p_lig_query.split ("/")[-1])
            # BS query
            copy2(p_BS, pr_BS)   
            
            # BS from reference
            l_atom_BS = parsePDB.computeBS (PDB_ref, p_ligand_ref, thresold = 4.50, option_onlyATOM = 0)
            writePDBfile.coordinateSection(pr_BS + "BS_REF_" + name_ligand + "_" + PDB_ref.split ("/")[-1], l_atom_BS, recorder = "ATOM", header = "BS_REF_" + name_ligand + "_" + PDB_ref, connect_matrix = 0)
            
            i = i + 1
    
    return 1

Example #13

Show file

def datasetPreparation(ligand_ID, clean=1):

    p_dir_dataset = pathManage.dataset(ligand_ID)
    l_folder = listdir(p_dir_dataset)
    indent = 0

    for ref_folder in l_folder:
        # file include in dataset folder
        if len(ref_folder) != 4:
            continue
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        indent = indent + 1
        print ref_folder, indent

        # clean repertory -> only PDB ref and PDB
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        if clean == 1:
            for pdbfile in l_pdbfile:
                p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile
                if not search(".pdb", pdbfile) or search(
                        "subref", pdbfile) or len(pdbfile.split("_")[0]) == 3:
                    remove(p_file_pdb)

        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile:
            p_file_pdb = p_dir_dataset + ref_folder + "/" + pdbfile
            # extract ligand in PDB
            l_ligand = parsePDB.retrieveListLigand(p_file_pdb)
            #             print l_ligand
            if l_ligand == []:
                continue
            else:
                l_atom_pdb_parsed = parsePDB.loadCoordSectionPDB(p_file_pdb)
                for name_ligand in l_ligand:
                    l_lig_parsed = parsePDB.retrieveLigand(
                        l_atom_pdb_parsed, name_ligand)
                    if l_lig_parsed == []:
                        continue
                    p_filout_ligand = p_dir_dataset + ref_folder + "/" + name_ligand + "_" + path.split(
                        p_file_pdb)[1]
                    writePDBfile.coordinateSection(p_filout_ligand,
                                                   l_lig_parsed[0],
                                                   "HETATM",
                                                   header=0,
                                                   connect_matrix=1)

        # ligand_ID write for shaep
#         print p_dir_dataset + ref_folder + "/"
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/",
                                             ligand_ID)
        if p_lig_ref == 0:

            continue
#         print p_lig_ref
        lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref)
        d_l_atom_substruct = substructTools.retrieveSubstruct(
            lig_ref_parsed, ligand_ID)
        # case with AMP without phosphate
        if d_l_atom_substruct == {}:
            continue
        # write ligand_ID
        for subs in d_l_atom_substruct.keys():
            p_filout_substruct = p_dir_dataset + ref_folder + "/subref_" + subs + "_" + ref_folder + ".pdb"
            writePDBfile.coordinateSection(p_filout_substruct,
                                           d_l_atom_substruct[subs],
                                           "HETATM",
                                           header=0,
                                           connect_matrix=1)

    return 1

Example #14

Show file

def retrieveSubstructSuperimposed(name_lig,
                                  thresold_BS=4.5,
                                  thresold_superimposed_ribose=2.5,
                                  thresold_superimposed_pi=3,
                                  thresold_shaep=0.4):

    # ouput
    p_dir_dataset = pathManage.dataset(name_lig)
    p_dir_result = pathManage.result(name_lig)
    l_folder_ref = listdir(p_dir_dataset)

    # log control
    p_log = open(p_dir_result + "log_superimposed.txt", "w")

    # control extraction
    d_control = {}
    d_control["pr ref"] = 0
    d_control["lig query"] = 0
    d_control["subref"] = {}
    d_control["subref empty"] = {}
    d_control["out sheap"] = {}
    filout_control = open(p_dir_result + "quality_extraction.txt", "w")

    # stock smile code
    d_smile = {}

    # sheap control
    d_filout_sheap = {}
    d_filout_sheap["list"] = [p_dir_result + "shaep_global.txt"]
    d_filout_sheap["global"] = open(p_dir_result + "shaep_global.txt", "w")
    d_filout_sheap["global"].write(
        "name\tbest_similarity\tshape_similarity\tESP_similarity\n")

    for ref_folder in l_folder_ref:
        # control folder reference name
        if len(ref_folder) != 4:
            p_log.write("[ERROR folder] -> " + ref_folder + "\n")
            continue

        # reference
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/",
                                             name_lig)
        try:
            lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
#             print len (lig_ref_parsed)
        except:
            p_log.write("[ERROR ligand ref] -> " + p_lig_ref + "\n")
            continue

        #control
        d_control["pr ref"] = d_control["pr ref"] + 1

        # output by reference
        p_dir_result_ref = pathManage.result(name_lig + "/" + ref_folder)
        d_filout_superimposed = {}
        d_filout_superimposed["global"] = open(
            p_dir_result_ref + "all_ligand_aligned.pdb", "w")
        d_filout_superimposed["sheap"] = open(
            p_dir_result_ref + "all_ligand_aligned_" + str(thresold_shaep) +
            ".pdb", "w")

        # write lig ref -> connect matrix corrrect in all reference and all sheap
        writePDBfile.coordinateSection(d_filout_superimposed["global"],
                                       lig_ref_parsed,
                                       "HETATM",
                                       connect_matrix=1)
        writePDBfile.coordinateSection(d_filout_superimposed["sheap"],
                                       lig_ref_parsed,
                                       "HETATM",
                                       connect_matrix=1)

        # inspect folder dataset
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile:
            # no ligand file
            if len(pdbfile.split("_")) == 1:
                continue
            pdbfile = pdbfile[:-4]  # remove extention

            if len(pdbfile.split("_")[0]) == 3 and len(pdbfile.split(
                    "_")[1]) == 4 and pdbfile.split("_")[1] != ref_folder:
                p_lig = p_dir_dataset + ref_folder + "/" + pdbfile + ".pdb"
                if p_lig_ref != p_lig:
                    # pass case where ligand replace same ligand -> does not need run
                    if pdbfile.split("_")[0] == name_lig:
                        p_log.write("[REMOVE] -> same ligand substituate")
                        continue

                    # parsed ligand query
                    lig_parsed = parsePDB.loadCoordSectionPDB(p_lig, "HETATM")

                    # find matrix of rotation
                    p_matrix = pathManage.findMatrix(p_lig_ref, p_lig,
                                                     name_lig)
                    # control file matrix exist
                    if not path.exists(p_matrix):
                        p_log.write("[ERROR] -> Matrix transloc " + p_lig_ref +
                                    " " + p_lig + " " + name_lig + "\n")
                        continue

                    # control
                    d_control["lig query"] = d_control["lig query"] + 1

                    # find the path of complex used
                    p_complex = p_dir_dataset + ref_folder + "/" + p_lig.split(
                        "/")[-1][4:]

                    # ligand rotated -> change the referentiel
                    superposeStructure.applyMatrixLigand(lig_parsed, p_matrix)

                    # use substruct
                    l_p_substruct_ref = pathManage.findSubstructRef(
                        pathManage.dataset(name_lig) + ref_folder + "/",
                        name_lig)
                    for p_substruct_ref in l_p_substruct_ref:
                        # ribose or phosphate
                        struct_type = p_substruct_ref.split("_")[-2]
                        substruct_parsed = parsePDB.loadCoordSectionPDB(
                            p_substruct_ref, "HETATM")

                        l_atom_substituate = neighborSearch.searchNeighborAtom(
                            substruct_parsed,
                            lig_parsed,
                            struct_type,
                            p_log,
                            thresold_superimposed_ribose=
                            thresold_superimposed_ribose,
                            thresold_superimposed_pi=thresold_superimposed_pi)
                        # control find
                        if len(l_atom_substituate) == 0:
                            if not struct_type in d_control[
                                    "subref empty"].keys():
                                d_control["subref empty"][struct_type] = 1
                            else:
                                d_control["subref empty"][
                                    struct_type] = d_control["subref empty"][
                                        struct_type] + 1
                            continue

                        else:
                            if not struct_type in d_control["subref"].keys():
                                d_control["subref"][struct_type] = 1
                            else:
                                d_control["subref"][struct_type] = d_control[
                                    "subref"][struct_type] + 1

                            # write PDB file, convert smile
                            p_substituate_pdb = p_dir_result_ref + "substituent_" + pdbfile.split(
                                "_")[0] + "_" + pdbfile.split(
                                    "_")[1] + "_" + struct_type + ".pdb"
                            writePDBfile.coordinateSection(p_substituate_pdb,
                                                           l_atom_substituate,
                                                           recorder="HETATM",
                                                           header=0,
                                                           connect_matrix=1)

                            # sheap reference on part of ligand
                            p_sheap = runOtherSoft.runShaep(
                                p_substruct_ref,
                                p_substituate_pdb,
                                p_substituate_pdb[0:-4] + ".hit",
                                clean=0)
                            val_sheap = parseShaep.parseOutputShaep(p_sheap)
                            if val_sheap == {}:
                                p_log.write("[ERROR] -> ShaEP " +
                                            p_substituate_pdb + " " +
                                            p_substruct_ref + "\n")

                                if not struct_type in d_control[
                                        "out sheap"].keys():
                                    d_control["out sheap"][struct_type] = 1
                                else:
                                    d_control["out sheap"][
                                        struct_type] = d_control["out sheap"][
                                            struct_type] + 1
                                continue

                            # control thresold sheap
                            if not struct_type in d_filout_sheap.keys():
                                d_filout_sheap[struct_type] = {}
                                d_filout_sheap[struct_type] = open(
                                    p_dir_result + "shaep_global_" +
                                    struct_type + ".txt", "w")
                                d_filout_sheap[struct_type].write(
                                    "name\tbest_similarity\tshape_similarity\tESP_similarity\n"
                                )
                                d_filout_sheap["list"].append(
                                    p_dir_result + "shaep_global_" +
                                    struct_type +
                                    ".txt")  # to improve with python function

                            # write value in ShaEP control
                            d_filout_sheap[struct_type].write(
                                ref_folder + "_" + str(pdbfile.split("_")[1]) +
                                "_" + struct_type + "_" +
                                str(pdbfile.split("_")[0]) + "\t" +
                                str(val_sheap["best_similarity"]) + "\t" +
                                str(val_sheap["shape_similarity"]) + "\t" +
                                str(val_sheap["ESP_similarity"]) + "\n")
                            d_filout_sheap["global"].write(
                                ref_folder + "_" + str(pdbfile.split("_")[1]) +
                                "_" + struct_type + "_" +
                                str(pdbfile.split("_")[0]) + "\t" +
                                str(val_sheap["best_similarity"]) + "\t" +
                                str(val_sheap["shape_similarity"]) + "\t" +
                                str(val_sheap["ESP_similarity"]) + "\n")

                            # rename file substituent with shaEP value
                            rename(
                                p_substituate_pdb,
                                p_substituate_pdb[:-4] + "_" +
                                str(val_sheap["best_similarity"]) + ".pdb")
                            # rename and change the file name
                            p_substituate_pdb = p_substituate_pdb[:-4] + "_" + str(
                                val_sheap["best_similarity"]) + ".pdb"

                            # write all substruct in global file
                            writePDBfile.coordinateSection(
                                d_filout_superimposed["global"],
                                lig_parsed,
                                recorder="HETATM",
                                header=str(p_lig.split("/")[-1]) + "_" +
                                str(val_sheap["best_similarity"]),
                                connect_matrix=1)

                            # control sheap thresold
                            if float(val_sheap["best_similarity"]
                                     ) >= thresold_shaep:

                                # write subligand superimposed selected in global files
                                writePDBfile.coordinateSection(
                                    d_filout_superimposed["sheap"],
                                    lig_parsed,
                                    recorder="HETATM",
                                    header=str(p_lig.split("/")[-1]) + "_" +
                                    str(val_sheap["best_similarity"]),
                                    connect_matrix=1)

                                ############
                                # write BS #
                                ############
                                # not only protein superimposed -> also ion and water
                                l_atom_complex = parsePDB.loadCoordSectionPDB(
                                    p_complex)
                                superposeStructure.applyMatrixProt(
                                    l_atom_complex, p_matrix)
                                p_file_cx = p_dir_result_ref + "CX_" + p_lig.split(
                                    "/")[-1]
                                # write CX
                                writePDBfile.coordinateSection(
                                    p_file_cx,
                                    l_atom_complex,
                                    recorder="ATOM",
                                    header=p_lig.split("/")[-1],
                                    connect_matrix=0)

                                # search atom in BS
                                l_atom_binding_site = []
                                for atom_complex in l_atom_complex:
                                    for atom_substruct in lig_parsed:
                                        if parsePDB.distanceTwoatoms(
                                                atom_substruct,
                                                atom_complex) <= thresold_BS:
                                            if not atom_complex in l_atom_binding_site:
                                                l_atom_binding_site.append(
                                                    deepcopy(atom_complex))

                                # 3. retrieve complet residue
                                l_atom_BS_res = parsePDB.getResidues(
                                    l_atom_binding_site, l_atom_complex)

                                # 4. write binding site
                                p_binding = p_dir_result_ref + "BS_" + p_lig.split(
                                    "/")[-1]
                                writePDBfile.coordinateSection(
                                    p_binding,
                                    l_atom_BS_res,
                                    "ATOM",
                                    p_binding,
                                    connect_matrix=0)

                                # smile code substituate analysis
                                # Step smile -> not conversion if shaep not validate
                                smile_find = runOtherSoft.babelConvertPDBtoSMILE(
                                    p_substituate_pdb)
                                if not struct_type in d_smile.keys():
                                    d_smile[struct_type] = {}
                                    d_smile[struct_type][smile_find] = {}
                                    d_smile[struct_type][smile_find][
                                        "count"] = 1
                                    d_smile[struct_type][smile_find]["PDB"] = [
                                        pdbfile.split("_")[1]
                                    ]
                                    d_smile[struct_type][smile_find][
                                        "ligand"] = [pdbfile.split("_")[0]]
                                    d_smile[struct_type][smile_find]["ref"] = [
                                        ref_folder
                                    ]
                                else:
                                    if not smile_find in d_smile[
                                            struct_type].keys():
                                        d_smile[struct_type][smile_find] = {}
                                        d_smile[struct_type][smile_find][
                                            "count"] = 1
                                        d_smile[struct_type][smile_find][
                                            "PDB"] = [pdbfile.split("_")[1]]
                                        d_smile[struct_type][smile_find][
                                            "ligand"] = [
                                                pdbfile.split("_")[0]
                                            ]
                                        d_smile[struct_type][smile_find][
                                            "ref"] = [ref_folder]
                                    else:
                                        d_smile[struct_type][smile_find][
                                            "count"] = d_smile[struct_type][
                                                smile_find]["count"] + 1
                                        d_smile[struct_type][smile_find][
                                            "PDB"].append(
                                                pdbfile.split("_")[1])
                                        d_smile[struct_type][smile_find][
                                            "ligand"].append(
                                                pdbfile.split("_")[0])
                                        d_smile[struct_type][smile_find][
                                            "ref"].append(ref_folder)

                            else:
                                if not struct_type in d_control[
                                        "out sheap"].keys():
                                    d_control["out sheap"][struct_type] = 1
                                else:
                                    d_control["out sheap"][
                                        struct_type] = d_control["out sheap"][
                                            struct_type] + 1

        tool.closeDicoFile(d_filout_superimposed)

    # sheap control
    tool.closeDicoFile(d_filout_sheap)
    for p_file_sheap in d_filout_sheap["list"]:
        runOtherSoft.RhistogramMultiple(p_file_sheap)

    # write list of smile
    for substruct in d_smile.keys():
        p_list_smile = pathManage.result(
            name_lig) + "list_" + substruct + "_" + str(
                thresold_shaep) + "_smile.txt"
        filout_smile = open(p_list_smile, "w")
        for smile_code in d_smile[substruct].keys():
            l_lig = d_smile[substruct][smile_code]["ligand"]
            l_PDB = d_smile[substruct][smile_code]["PDB"]
            l_ref = d_smile[substruct][smile_code]["ref"]
            filout_smile.write(
                str(smile_code) + "\t" +
                str(d_smile[substruct][smile_code]["count"]) + "\t" +
                " ".join(l_PDB) + "\t" + " ".join(l_ref) + "\t" +
                " ".join(l_lig) + "\n")
        filout_smile.close()
    p_log.close()

    # control
    filout_control.write("NB ref: " + str(d_control["pr ref"]) + "\n")
    filout_control.write("Ligand query: " + str(d_control["lig query"]) + "\n")
    for k in d_control["subref"].keys():
        filout_control.write("LSR " + str(k) + ": " +
                             str(d_control["subref"][k]) + "\n")
    for k in d_control["subref empty"].keys():
        filout_control.write("NB LSR empty " + str(k) + ": " +
                             str(d_control["subref empty"][k]) + "\n")
    for k in d_control["out sheap"].keys():
        filout_control.write("LSR out by sheap " + str(k) + ": " +
                             str(d_control["out sheap"][k]) + "\n")

    filout_control.write("**********************\n\n")
    for k in d_control["subref"].keys():
        filout_control.write("LSR keep" + str(k) + ": " +
                             str(d_control["subref"][k] -
                                 d_control["out sheap"][k]) + "\n")

    filout_control.close()

    return 1

Example #15

Show file

def analyseIons (pr_dataset, name_ligand, p_filout, thresold_max_interaction = 4.0) : 

    l_folder_ref = listdir(pr_dataset)

    filout = open (p_filout, "w")
    if name_ligand == "ATP" : 
        filout.write ("PDB\tIon\tD1\tD2\tD3\tAngle1\tAngle2\tAt1\tAt2\tA3\n")
    else : 
        filout.write ("PDB\tIon\tD1\tD2\tAngle\tAt1\tAt2\n")
    
    # dictionnary of counting
    d_count = {}
    d_count["CX"] = 0
    d_count["CX + ions"] = 0
    d_count["BS + ions"] = 0
    d_count["BS + 1-ion"] = 0
    d_count["BS + 2-ions"] = 0
    d_count["BS + more-ions"] = 0
    d_count["Interact-1"] = 0
    d_count["Interact-2"] = 0

    
    # dictionnary by ions
    d_ions = {}
    
    for ref_folder in l_folder_ref  :
        only_one = 0
        if len (ref_folder) != 4 : 
            continue
        d_count["CX"] = d_count["CX"] + 1
        l_temp = []
        # path and complex
        p_lig_ref = pathManage.findligandRef(pr_dataset + ref_folder + "/", name_ligand)
        p_complex = pathManage.findPDBRef(pr_dataset + ref_folder + "/")
    
        # parsing
        lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
        l_het_parsed = parsePDB.loadCoordSectionPDB(p_complex, "HETATM")
    
        # retrieve phosphate
        l_pi = retrieveTwoAtomForAngle (lig_ref_parsed, name_ligand)
        if l_pi == [] : # case ligand without phosphate 
            continue 
        flag_interact = 0
        flag_between_1 = 0
        flag_between_2 = 0
        for het_parsed in l_het_parsed : 
            if het_parsed["resName"] in l_ions : 
                d_count["CX + ions"] = d_count["CX + ions"] + 1
                if not het_parsed ["resName"] in d_ions.keys () : 
                    d_ions[het_parsed["resName"]] = 0
                if not het_parsed["resName"] in l_temp :  
                    d_ions[het_parsed["resName"]] = d_ions[het_parsed["resName"]] + 1
                    l_temp.append (het_parsed["resName"])
                PDB_id = ref_folder
                d1 = parsePDB.distanceTwoatoms(l_pi[0], het_parsed)
                d2 = parsePDB.distanceTwoatoms(l_pi[1], het_parsed)
                if name_ligand == "ATP" : 
                    # print len(l_pi), ref_folder, p_lig_ref
                    d3 = parsePDB.distanceTwoatoms(l_pi[2], het_parsed)
                    angle_bis = parsePDB.angleVector(l_pi[1], het_parsed, l_pi[2])
                angle = parsePDB.angleVector(l_pi[0], het_parsed, l_pi[1])
            
                if d1 < 10 and d2 < 10 : 
                    if not het_parsed["resName"] in d_count.keys () : 
                        d_count[het_parsed["resName"]] = 0
                    if only_one == 0 : 
                        d_count[het_parsed["resName"]] = d_count[het_parsed["resName"]] + 1
                        only_one = 1
                    d_count["BS + ions"] = d_count["BS + ions"] + 1
                    flag_interact = flag_interact + 1
                    if d1 < thresold_max_interaction and d2 < thresold_max_interaction : 
                        flag_between_1 = flag_between_1 + 1


                    if name_ligand == "ATP" :
                        if d3 < thresold_max_interaction and d2 < thresold_max_interaction : 
                            flag_between_2 = flag_between_2 + 1
                        filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str (d3) + "\t" + str(angle) + "\t" + str(angle_bis) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\t" + str(l_pi[2]["serial"]) + "\n")
                    else : 
                        filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str(angle) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\n")
    
        if flag_interact == 1 : 
            d_count["BS + 1-ion"] = d_count["BS + 1-ion"] + 1
        elif flag_interact == 2 : 
            d_count["BS + 2-ions"] = d_count["BS + 2-ions"] + 1
        elif flag_interact > 2 : 
            d_count["BS + more-ions"] = d_count["BS + more-ions"] + 1

        if flag_between_1 >= 1 : 
            d_count["Interact-1"] = d_count["Interact-1"] + flag_between_1
        if flag_between_2 >= 1 : 
            d_count["Interact-2"] = d_count["Interact-2"] + flag_between_2 

    filout.close ()
    
    filout_count = open (p_filout[0:-4] + "count.txt", "w")
    filout_count.write ("CX: " + str (d_count["CX"]) + "\n")
    filout_count.write ("CX + ions: " + str (d_count["CX + ions"]) + "\n")
    filout_count.write ("BS + ions: " + str(d_count["BS + ions"]) + "\n")
    filout_count.write ("BS + 1-ion: " + str(d_count["BS + 1-ion"]) + "\n")
    filout_count.write ("BS + 2-ions: " + str(d_count["BS + 2-ions"]) + "\n")
    filout_count.write ("BS + more-ions: " + str(d_count["BS + more-ions"]) + "\n")
    filout_count.write ("Interact Pi-alpha + Pi-beta: " + str(d_count["Interact-1"]) + "\n")
    filout_count.write ("Interact Pi-beta + Pi-gama: " + str(d_count["Interact-2"]) + "\n")
    filout_count.close ()

    filout_by_ion = open(p_filout[0:-4] + "byIons_" + name_ligand, "w")
    l_k = d_ions.keys ()
    for k in l_k : 
        filout_by_ion.write (str (k.capitalize()) + "\t" + str (d_ions[k]) + "\n")
    filout_by_ion.close ()
   
    runOtherSoft.barplot (p_filout[0:-4] + "byIons_" + name_ligand)

Example #16

Show file

def computeRMSDBS (p_ref, p_query, p_substruct, pr_result, thresold_BS = 6) :
    
    
    l_atom_query_parsed = parsePDB.loadCoordSectionPDB(p_query, "ATOM")
    l_atom_ref_parsed = parsePDB.loadCoordSectionPDB(p_ref, "ATOM")
    
    l_atom_substruct = parsePDB.loadCoordSectionPDB(p_substruct)
    
    
    
    l_BS_ref = []
    
    for atom_substruct in l_atom_substruct : 
        for atom_ref in l_atom_ref_parsed : 
            d_atom = parsePDB.distanceTwoatoms(atom_substruct, atom_ref)
            if d_atom <= thresold_BS : 
                l_BS_ref.append (atom_ref)
    # retrieve residue full
    l_BS_ref = parsePDB.getResidues(l_BS_ref, l_atom_ref_parsed)
    
#     print len (l_BS_ref)
#     print len (l_atom_query_parsed)
    
    
    l_BS_query = []
    flag_identic_crystal = 1
    for atomBS_ref in l_BS_ref :
#         print  atomBS_parsed 
        d_max = 100.0 
        for atom_query in l_atom_query_parsed :
            if atom_query["resName"] ==  atomBS_ref["resName"] and atom_query["name"] ==  atomBS_ref["name"] : 
                d = parsePDB.distanceTwoatoms(atom_query, atomBS_ref)
                if d < d_max : 
                    d_max = d
                    res_temp = atom_query
                
        
        #if d_max < thresold_BS : 
        if "res_temp" in locals () :     
            l_BS_query.append (deepcopy(res_temp))
        # identic check number
            if res_temp["resSeq"] != atomBS_ref["resSeq"] : 
                flag_identic_crystal = 0
        #else : 
            # case structure not found
        #    return []
    
    
#     print len (l_BS_query), len (l_BS_ref)
    l_RMSD = RMSDTwoList (l_BS_query, l_BS_ref)
    
    # write PDB
    #p_filout_pdb = pr_result + p_query.split ("/")[-1][0:-4] + "_" + str (flag_identic_crystal) + "_" + p_substruct.split ("_")[-2] + "_" + p_ref.split ("/")[-1]
    #filout_pdb = open (p_filout_pdb, "w")
    #writePDBfile.coordinateSection(filout_pdb, l_BS_ref, recorder = "ATOM")
    #writePDBfile.coordinateSection(filout_pdb, l_BS_query, recorder = "ATOM", header = 0 )
    #filout_pdb.close ()
    
    if l_RMSD == [] : 
        return []
    else : 
        return l_RMSD + [flag_identic_crystal]