Python distanceTwoatoms Examples

Programming Language: Python

Namespace/Package Name: parsePDB

Method/Function: distanceTwoatoms

Examples at hotexamples.com: 8

Python distanceTwoatoms - 8 examples found. These are the top rated real world Python examples of parsePDB.distanceTwoatoms extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def searchNeighborAtom(substruct_parsed,
                       lig_query_parsed,
                       struct_type,
                       log_file,
                       thresold_superimposed_ribose=2.5,
                       thresold_superimposed_pi=3):

    l_atom_substituate = []
    if struct_type == "ribose":
        for atom_substruct in substruct_parsed:
            for atom_query in lig_query_parsed:
                if parsePDB.distanceTwoatoms(
                        atom_substruct,
                        atom_query) <= thresold_superimposed_ribose:
                    out = copy(atom_query)
                    if not out in l_atom_substituate:
                        l_atom_substituate.append(out)

    else:
        l_atom_interest = retrievePi(substruct_parsed)
        for atom_interest in l_atom_interest:
            for atom_query in lig_query_parsed:
                if parsePDB.distanceTwoatoms(
                        atom_interest, atom_query) <= thresold_superimposed_pi:
                    out = copy(atom_query)
                    if not out in l_atom_substituate:
                        l_atom_substituate.append(out)

    # control out empty
    if l_atom_substituate == []:
        log_file.write("[Not substituate] -> " +
                       substruct_parsed[0]["resName"] + struct_type + "\n")
        return []
    else:
        return l_atom_substituate

Example #2

Show file

File: neighborSearch.py Project: ABorrel/LSRs

def searchNeighborAtom(substruct_parsed, lig_query_parsed, struct_type, log_file, thresold_superimposed_ribose = 2.5, thresold_superimposed_pi = 3 ) : 
    
    l_atom_substituate = []
    if struct_type == "ribose" : 
        for atom_substruct in substruct_parsed :
            for atom_query in lig_query_parsed : 
                if parsePDB.distanceTwoatoms(atom_substruct, atom_query) <= thresold_superimposed_ribose : 
                    out = copy(atom_query)
                    if not out in l_atom_substituate : 
                        l_atom_substituate.append (out)
        
    else : 
        l_atom_interest = retrievePi (substruct_parsed)
        for atom_interest in l_atom_interest :
            for atom_query in lig_query_parsed : 
                if parsePDB.distanceTwoatoms(atom_interest, atom_query) <= thresold_superimposed_pi : 
                    out = copy(atom_query)
                    if not out in l_atom_substituate : 
                        l_atom_substituate.append (out)
    
    # control out empty
    if l_atom_substituate == [] : 
        log_file.write ("[Not substituate] -> " + substruct_parsed[0] ["resName"]+ struct_type + "\n")
        return []
    else : 
        return l_atom_substituate

Example #3

Show file

def RMSDTwoList (l_atom1, l_atom2) : 
    
    nb_ca = 0.0
    d_max = {"value": 0.0}
    diff_position_all = 0.0
    diff_position_ca = 0.0
    
    if len (l_atom1) != len (l_atom2) or len (l_atom2) == 0 : 
        print "ERROR - RMSD: list length different or null"
        return []
    else : 
        i = 0
        while i < len (l_atom1): 
            if l_atom1[i]["name"] != l_atom2[i]["name"] and l_atom1[i]["resName"] != l_atom2[i]["resName"]: 
                print l_atom1[i]["name"] , l_atom2[i]["name"]
                print "ERROR"
                return []
            else : 
                d_atom = parsePDB.distanceTwoatoms(l_atom1[i], l_atom2[i])
                diff_position_all = diff_position_all + d_atom
                
                if l_atom1[i]["name"] == "CA" : 
                    diff_position_ca = diff_position_ca + d_atom
                    nb_ca = nb_ca + 1
                
                if d_atom > d_max["value"] : 
                    d_max["value"] = d_atom
                    d_max["atom"] = l_atom1[i]["name"] + "-" +  l_atom2[i]["name"] + "_" + l_atom1[i]["resName"] + "-" +  l_atom2[i]["resName"]
                    
            i = i + 1
#     print d_max
    return [sqrt(diff_position_all / len (l_atom1)), sqrt (diff_position_ca / nb_ca), d_max["value"], len (l_atom1)]

Example #4

Show file

File: main.py Project: ABorrel/LSRs

def retrieveSubstructSuperimposed (name_lig, thresold_BS = 4.5, thresold_superimposed_ribose = 2.5, thresold_superimposed_pi = 3, thresold_shaep = 0.4):

    # ouput
    p_dir_dataset = pathManage.dataset(name_lig)
    p_dir_result = pathManage.result(name_lig )
    l_folder_ref = listdir(p_dir_dataset)

    # log control
    p_log = open(p_dir_result + "log_superimposed.txt", "w")

    # control extraction
    d_control = {}
    d_control["pr ref"] = 0
    d_control["lig query"] = 0
    d_control["subref"] = {}
    d_control["subref empty"] = {}
    d_control["out sheap"] = {}
    filout_control = open (p_dir_result + "quality_extraction.txt", "w")

    # stock smile code
    d_smile = {}

    # sheap control
    d_filout_sheap = {}
    d_filout_sheap ["list"] = [p_dir_result + "shaep_global.txt"]
    d_filout_sheap["global"] = open (p_dir_result + "shaep_global.txt", "w") 
    d_filout_sheap["global"].write ("name\tbest_similarity\tshape_similarity\tESP_similarity\n")

    for ref_folder in l_folder_ref :
        # control folder reference name
        if len (ref_folder) != 4 : 
            p_log.write ("[ERROR folder] -> " + ref_folder + "\n")
            continue

        # reference
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/", name_lig)
        try:
            lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
#             print len (lig_ref_parsed)
        except:
            p_log.write ("[ERROR ligand ref] -> " + p_lig_ref + "\n")
            continue

        #control
        d_control["pr ref"] = d_control["pr ref"] + 1

        # output by reference
        p_dir_result_ref = pathManage.result(name_lig + "/" + ref_folder)
        d_filout_superimposed = {}
        d_filout_superimposed["global"] = open (p_dir_result_ref + "all_ligand_aligned.pdb", "w")
        d_filout_superimposed["sheap"] = open (p_dir_result_ref + "all_ligand_aligned_" + str (thresold_shaep)  + ".pdb", "w")
        
        
        
        # write lig ref -> connect matrix corrrect in all reference and all sheap
        writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_ref_parsed, "HETATM", connect_matrix = 1)
        writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_ref_parsed, "HETATM", connect_matrix = 1)
        
        # inspect folder dataset
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile : 
            # no ligand file
            if len (pdbfile.split ("_")) == 1 : 
                continue
            pdbfile = pdbfile[:-4] # remove extention
            
            if len(pdbfile.split ("_")[0]) == 3  and len(pdbfile.split ("_")[1]) == 4 and pdbfile.split ("_")[1] != ref_folder:
                p_lig = p_dir_dataset + ref_folder + "/" + pdbfile  + ".pdb"
                if p_lig_ref != p_lig : 
                    # pass case where ligand replace same ligand -> does not need run
                    if pdbfile.split ("_")[0] == name_lig : 
                        p_log.write ("[REMOVE] -> same ligand substituate")
                        continue
                    
                    # parsed ligand query
                    lig_parsed = parsePDB.loadCoordSectionPDB(p_lig, "HETATM")

                    # find matrix of rotation
                    p_matrix = pathManage.findMatrix(p_lig_ref, p_lig, name_lig)
                    # control file matrix exist
                    if not path.exists(p_matrix) : 
                        p_log.write ("[ERROR] -> Matrix transloc " + p_lig_ref + " " + p_lig + " " + name_lig + "\n")
                        continue
                    
                    # control
                    d_control["lig query"] = d_control["lig query"] + 1
                    
                    # find the path of complex used
                    p_complex = p_dir_dataset + ref_folder + "/" + p_lig.split ("/")[-1][4:]
                    
                    # ligand rotated -> change the referentiel
                    superposeStructure.applyMatrixLigand(lig_parsed, p_matrix)
                    
                    
                    # use substruct
                    l_p_substruct_ref = pathManage.findSubstructRef (pathManage.dataset(name_lig) + ref_folder + "/" , name_lig)
                    for p_substruct_ref in l_p_substruct_ref : 
                        # ribose or phosphate
                        struct_type = p_substruct_ref.split ("_")[-2]
                        substruct_parsed = parsePDB.loadCoordSectionPDB(p_substruct_ref, "HETATM")
                        
                        l_atom_substituate = neighborSearch.searchNeighborAtom(substruct_parsed, lig_parsed, struct_type, p_log, thresold_superimposed_ribose = thresold_superimposed_ribose, thresold_superimposed_pi = thresold_superimposed_pi)    
                        # control find 
                        if len (l_atom_substituate) == 0 :  
                            if not struct_type in d_control["subref empty"].keys () : 
                                d_control["subref empty"][struct_type] = 1
                            else : 
                                d_control["subref empty"][struct_type] = d_control["subref empty"][struct_type] + 1
                            continue
                        
                        else : 
                            if not struct_type in d_control["subref"].keys () : 
                                d_control["subref"][struct_type] = 1
                            else : 
                                d_control["subref"][struct_type] = d_control["subref"][struct_type] + 1
                            
                            # write PDB file, convert smile
                            p_substituate_pdb = p_dir_result_ref + "substituent_" + pdbfile.split ("_")[0] + "_" + pdbfile.split ("_")[1] + "_" + struct_type + ".pdb"
                            writePDBfile.coordinateSection(p_substituate_pdb, l_atom_substituate, recorder="HETATM", header=0, connect_matrix = 1)
    
                            # sheap reference on part of ligand
                            p_sheap = runOtherSoft.runShaep (p_substruct_ref, p_substituate_pdb, p_substituate_pdb[0:-4] + ".hit", clean = 0)
                            val_sheap = parseShaep.parseOutputShaep (p_sheap)
                            if val_sheap == {} : 
                                p_log.write ("[ERROR] -> ShaEP " + p_substituate_pdb + " " + p_substruct_ref + "\n")
                                
                                if not struct_type in d_control["out sheap"].keys () :
                                    d_control["out sheap"][struct_type] = 1
                                else : 
                                    d_control["out sheap"][struct_type] = d_control["out sheap"][struct_type] + 1
                                continue
                            
                            # control thresold sheap
                            if not struct_type in d_filout_sheap.keys () : 
                                d_filout_sheap[struct_type] = {}
                                d_filout_sheap[struct_type] = open (p_dir_result + "shaep_global_" + struct_type + ".txt", "w")
                                d_filout_sheap[struct_type].write ("name\tbest_similarity\tshape_similarity\tESP_similarity\n")
                                d_filout_sheap["list"].append (p_dir_result + "shaep_global_" + struct_type + ".txt") # to improve with python function
                            
                            # write value in ShaEP control
                            d_filout_sheap[struct_type].write (ref_folder + "_" +  str(pdbfile.split ("_")[1]) + "_" + struct_type + "_" + str (pdbfile.split ("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n")
                            d_filout_sheap["global"].write (ref_folder + "_" +  str(pdbfile.split ("_")[1]) + "_" + struct_type + "_" + str (pdbfile.split ("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n")
                            
                            # rename file substituent with shaEP value
                            rename(p_substituate_pdb, p_substituate_pdb[:-4] + "_" + str (val_sheap["best_similarity"]) + ".pdb")
                            # rename and change the file name
                            p_substituate_pdb = p_substituate_pdb[:-4] + "_" + str (val_sheap["best_similarity"]) + ".pdb"
                            
                            # write all substruct in global file
                            writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_parsed, recorder= "HETATM", header = str(p_lig.split ("/")[-1]) + "_" + str (val_sheap["best_similarity"]) ,  connect_matrix = 1)
                            
                            # control sheap thresold    
                            if float(val_sheap["best_similarity"]) >= thresold_shaep  : 
                                
                                # write subligand superimposed selected in global files
                                writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_parsed, recorder= "HETATM", header = str(p_lig.split ("/")[-1]) + "_" + str (val_sheap["best_similarity"]) ,  connect_matrix = 1)
                                
                                ############
                                # write BS #
                                ############
                                # not only protein superimposed -> also ion and water
                                l_atom_complex = parsePDB.loadCoordSectionPDB(p_complex)
                                superposeStructure.applyMatrixProt(l_atom_complex, p_matrix)
                                p_file_cx = p_dir_result_ref +  "CX_" + p_lig.split ("/")[-1]
                                # write CX
                                writePDBfile.coordinateSection(p_file_cx, l_atom_complex, recorder="ATOM", header= p_lig.split ("/")[-1], connect_matrix = 0)
    
                                # search atom in BS
                                l_atom_binding_site = []
                                for atom_complex in l_atom_complex : 
                                    for atom_substruct in lig_parsed : 
                                        if parsePDB.distanceTwoatoms (atom_substruct, atom_complex) <= thresold_BS :
                                            if not atom_complex in l_atom_binding_site : 
                                                l_atom_binding_site.append (deepcopy(atom_complex))
                                
                                # 3. retrieve complet residue
                                l_atom_BS_res = parsePDB.getResidues(l_atom_binding_site, l_atom_complex)
                                                
                                # 4. write binding site
                                p_binding = p_dir_result_ref +  "BS_" + p_lig.split ("/")[-1]
                                writePDBfile.coordinateSection(p_binding, l_atom_BS_res, "ATOM", p_binding, connect_matrix = 0)
                                
                                # smile code substituate analysis                    
                                # Step smile -> not conversion if shaep not validate 
                                smile_find = runOtherSoft.babelConvertPDBtoSMILE(p_substituate_pdb)
                                if not struct_type in d_smile.keys ()  :
                                    d_smile[struct_type] = {}
                                    d_smile[struct_type][smile_find] = {}
                                    d_smile[struct_type][smile_find]["count"] = 1
                                    d_smile[struct_type][smile_find]["PDB"] = [pdbfile.split ("_")[1]]
                                    d_smile[struct_type][smile_find]["ligand"] = [pdbfile.split ("_")[0]]
                                    d_smile[struct_type][smile_find]["ref"] = [ref_folder]
                                else : 
                                    if not smile_find in d_smile[struct_type].keys () : 
                                        d_smile[struct_type][smile_find] = {}
                                        d_smile[struct_type][smile_find]["count"] = 1
                                        d_smile[struct_type][smile_find]["PDB"] = [pdbfile.split ("_")[1]]
                                        d_smile[struct_type][smile_find]["ligand"] = [pdbfile.split ("_")[0]] 
                                        d_smile[struct_type][smile_find]["ref"] = [ref_folder]
                                    else : 
                                        d_smile[struct_type][smile_find]["count"] = d_smile[struct_type][smile_find]["count"] + 1
                                        d_smile[struct_type][smile_find]["PDB"].append (pdbfile.split ("_")[1])
                                        d_smile[struct_type][smile_find]["ligand"].append (pdbfile.split ("_")[0])
                                        d_smile[struct_type][smile_find]["ref"].append (ref_folder)

                            else : 
                                if not struct_type in d_control["out sheap"].keys () : 
                                    d_control["out sheap"][struct_type] = 1
                                else : 
                                    d_control["out sheap"][struct_type] = d_control["out sheap"][struct_type] + 1

        tool.closeDicoFile (d_filout_superimposed)

    # sheap control    
    tool.closeDicoFile (d_filout_sheap)
    for p_file_sheap in d_filout_sheap["list"] : 
        runOtherSoft.RhistogramMultiple (p_file_sheap)    
        
            
    # write list of smile
    for substruct in d_smile.keys () : 
        p_list_smile = pathManage.result(name_lig) + "list_" + substruct + "_" + str (thresold_shaep) + "_smile.txt"
        filout_smile = open (p_list_smile, "w")
        for smile_code in d_smile[substruct].keys () : 
            l_lig = d_smile[substruct][smile_code]["ligand"]
            l_PDB = d_smile[substruct][smile_code]["PDB"]
            l_ref = d_smile[substruct][smile_code]["ref"]
            filout_smile.write (str (smile_code) + "\t" + str (d_smile[substruct][smile_code]["count"]) + "\t" + " ".join (l_PDB) + "\t" + " ".join (l_ref) + "\t" + " ".join(l_lig) + "\n")
        filout_smile.close ()
    p_log.close ()
    
    # control
    filout_control.write ("NB ref: " + str(d_control["pr ref"]) + "\n")
    filout_control.write ("Ligand query: " + str(d_control["lig query"]) + "\n")
    for k in d_control["subref"].keys () :
        filout_control.write ("LSR " + str (k) + ": " + str(d_control["subref"][k]) + "\n")
    for k in d_control["subref empty"].keys () :
        filout_control.write ("NB LSR empty " + str (k) + ": " + str(d_control["subref empty"][k]) + "\n")
    for k in d_control["out sheap"].keys () :
        filout_control.write ("LSR out by sheap " + str (k) + ": " + str(d_control["out sheap"][k]) + "\n")
    
    filout_control.write ("**********************\n\n")
    for k in d_control["subref"].keys () :
        filout_control.write ("LSR keep" + str (k) + ": " + str(d_control["subref"][k] - d_control["out sheap"][k]) + "\n")
    
    filout_control.close ()
    
    return 1

Example #5

Show file

File: arrangeResult.py Project: ABorrel/LSRs

def enantiomer(l_ligand, name_folder_final, debug = 1) : 
    "to do file output"
    
    pr_final = pathManage.result("final_" + name_folder_final)
    
    pr_enantiomer = pathManage.generatePath(pr_final + "enantiomer/")
    
    l_ref = []

    d_filout = {}
    for ligand in l_ligand : 
        d_filout[ligand] = {}
        d_filout[ligand]["O3OP"]= open (pr_enantiomer + ligand + "_" + "O3OP" , "w")
        d_filout[ligand]["O4O5"]= open (pr_enantiomer + ligand + "_" + "O4O5" , "w")
        d_filout[ligand]["OPOP"]= open (pr_enantiomer + ligand + "_" + "OPOP" , "w")
        
    l_pr_type_ref = listdir(pr_final) 
    for pr_type_ref in l_pr_type_ref : 
        if debug : print "1", pr_type_ref
        # case where pr_substruct is a file not a folder
        try : l_pr_sub = listdir(pr_final + pr_type_ref + "/")
        except : continue

        for pr_sub in l_pr_sub : 
            print "2", pr_sub

            # case cycle -> append in list respertory with new folder
            if pr_sub == "cycle" : 
                l_pr_sub.remove ("cycle")
                l_pr_sub_cycle = listdir (pr_final + pr_type_ref + "/cycle")
                for pr_sub_cycle in l_pr_sub_cycle : 
                    l_pr_sub.append ("cycle/" + pr_sub_cycle)
                break
        
        for pr_sub in l_pr_sub : 
            try : l_pr_ref = listdir (pr_final + pr_type_ref + "/" + pr_sub)
            except : pass
            if debug : print "3", pr_sub
            
            for pr_ref in l_pr_ref : 
                if debug : print "4", pr_ref
                # case no folder
                try : l_file = listdir(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/")
                except : continue
                for name_file in l_file : 
                    if search("LGD_REF_A",name_file) and search(".pdb",name_file): 
                        #print "2222", l_ref
                        if name_file.split("_")[3][:4] in l_ref : 
                            print "!!!!!", "IN"
                            break
                        else : l_ref.append (name_file.split ("_")[3][:4])                       
 
                        ligand = name_file.split ("_")[2]
                        l_atom_ligand = parsePDB.loadCoordSectionPDB(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/" + name_file, "HETATM")
                        d_minO3OP = 100
                        for atom_ligand in l_atom_ligand : 
                            if atom_ligand["name"] == "O4'" :
                                atom_O4 = atom_ligand
                            elif atom_ligand["name"] == "O5'" :
                                atom_O5 = atom_ligand
                            elif  atom_ligand["name"] == "O3'" :
                                atom_O3 = atom_ligand
                            elif  atom_ligand["name"] == "O1A" :
                                atom_O1A = atom_ligand
                            elif  atom_ligand["name"] == "O2A" :
                                atom_O2A = atom_ligand
                            elif  atom_ligand["name"] == "O1B" :
                                atom_O1B = atom_ligand
                            elif  atom_ligand["name"] == "O2B" :
                                atom_O2B = atom_ligand
                            #elif  atom_ligand["name"] == "O3B" :
                            #    atom_O3B = atom_ligand
                    
                        # d O4 - O5        
                        try : d_O4O5 = parsePDB.distanceTwoatoms(atom_O4, atom_O5)
                        except : continue
                        d_filout[ligand]["O4O5"].write (pr_ref + "_" + pr_type_ref  + "\t" + str (d_O4O5) + "\n")

                        # d O3 - OP
                        for atom_ligand in l_atom_ligand : 
                            if ligand == "AMP" : 
                                if atom_ligand["name"] == "O1P" or atom_ligand["name"] == "O2P" or atom_ligand["name"] == "O3P" : 
                                    d_tempO3OP = parsePDB.distanceTwoatoms(atom_O3, atom_ligand)
                                    if d_tempO3OP < d_minO3OP : 
                                        d_minO3OP = d_tempO3OP
                                        atom_tempO3OP = deepcopy(atom_ligand)
                            else : 
                                if atom_ligand["name"] == "O1A" or atom_ligand["name"] == "O2A" or atom_ligand["name"] == "O3A" : 
                                    d_tempO3OP = parsePDB.distanceTwoatoms(atom_O4, atom_ligand)
                                    if d_tempO3OP < d_minO3OP : 
                                        d_minO3OP = d_tempO3OP
                                        atom_tempO3OP = deepcopy(atom_ligand)
                        d_filout[ligand]["O3OP"].write (pr_ref + "_" + pr_type_ref  +"_" + str(atom_tempO3OP["name"]) + "\t" + str (d_minO3OP) + "\n")
    
                        # d OP OP
                        d_OP = {}
                        if ligand == "ATP" or ligand == "ADP" : 
                            d_OP ["O1AO1B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O1B)
                            d_OP ["O1AO2B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O2B)
                            #d_OP ["O1AO3B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O3B)
                            d_OP ["O2AO1B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O1B)
                            d_OP ["O2AO2B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O2B)
                            #d_OP ["O2AO3B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O3B)
                        
                            d_minOPOP = min (d_OP.values())
                            #print d_minOPOP
                            k_min = [name for name, age in d_OP.items() if age == min (d_OP.values())][0]
                            #print k_min
                            d_filout[ligand]["OPOP"].write (pr_ref + "_" + pr_type_ref  + "_" + str(k_min) + "\t" + str (d_minOPOP) + "\n")
                    
                        try :
                            del d_OP 
                            del atom_O1A
                            del atom_O1B
                            del atom_O2A
                            del atom_O2B
                        except : 
                            pass
                        try : 
                            del atom_O3
                            del atom_O4
                            del atom_O5
                        except :
                            pass
            
    # close files
    for lig in l_ligand : 
        for type_dist in d_filout[lig].keys () : 
            p_file = d_filout[lig][type_dist].name
            d_filout[lig][type_dist].close ()
            runOtherSoft.Rhistogram(p_file, type_dist, brk = 20)

Example #6

Show file

def retrieveSubstructSuperimposed(name_lig,
                                  thresold_BS=4.5,
                                  thresold_superimposed_ribose=2.5,
                                  thresold_superimposed_pi=3,
                                  thresold_shaep=0.4):

    # ouput
    p_dir_dataset = pathManage.dataset(name_lig)
    p_dir_result = pathManage.result(name_lig)
    l_folder_ref = listdir(p_dir_dataset)

    # log control
    p_log = open(p_dir_result + "log_superimposed.txt", "w")

    # control extraction
    d_control = {}
    d_control["pr ref"] = 0
    d_control["lig query"] = 0
    d_control["subref"] = {}
    d_control["subref empty"] = {}
    d_control["out sheap"] = {}
    filout_control = open(p_dir_result + "quality_extraction.txt", "w")

    # stock smile code
    d_smile = {}

    # sheap control
    d_filout_sheap = {}
    d_filout_sheap["list"] = [p_dir_result + "shaep_global.txt"]
    d_filout_sheap["global"] = open(p_dir_result + "shaep_global.txt", "w")
    d_filout_sheap["global"].write(
        "name\tbest_similarity\tshape_similarity\tESP_similarity\n")

    for ref_folder in l_folder_ref:
        # control folder reference name
        if len(ref_folder) != 4:
            p_log.write("[ERROR folder] -> " + ref_folder + "\n")
            continue

        # reference
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/",
                                             name_lig)
        try:
            lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
#             print len (lig_ref_parsed)
        except:
            p_log.write("[ERROR ligand ref] -> " + p_lig_ref + "\n")
            continue

        #control
        d_control["pr ref"] = d_control["pr ref"] + 1

        # output by reference
        p_dir_result_ref = pathManage.result(name_lig + "/" + ref_folder)
        d_filout_superimposed = {}
        d_filout_superimposed["global"] = open(
            p_dir_result_ref + "all_ligand_aligned.pdb", "w")
        d_filout_superimposed["sheap"] = open(
            p_dir_result_ref + "all_ligand_aligned_" + str(thresold_shaep) +
            ".pdb", "w")

        # write lig ref -> connect matrix corrrect in all reference and all sheap
        writePDBfile.coordinateSection(d_filout_superimposed["global"],
                                       lig_ref_parsed,
                                       "HETATM",
                                       connect_matrix=1)
        writePDBfile.coordinateSection(d_filout_superimposed["sheap"],
                                       lig_ref_parsed,
                                       "HETATM",
                                       connect_matrix=1)

        # inspect folder dataset
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile:
            # no ligand file
            if len(pdbfile.split("_")) == 1:
                continue
            pdbfile = pdbfile[:-4]  # remove extention

            if len(pdbfile.split("_")[0]) == 3 and len(pdbfile.split(
                    "_")[1]) == 4 and pdbfile.split("_")[1] != ref_folder:
                p_lig = p_dir_dataset + ref_folder + "/" + pdbfile + ".pdb"
                if p_lig_ref != p_lig:
                    # pass case where ligand replace same ligand -> does not need run
                    if pdbfile.split("_")[0] == name_lig:
                        p_log.write("[REMOVE] -> same ligand substituate")
                        continue

                    # parsed ligand query
                    lig_parsed = parsePDB.loadCoordSectionPDB(p_lig, "HETATM")

                    # find matrix of rotation
                    p_matrix = pathManage.findMatrix(p_lig_ref, p_lig,
                                                     name_lig)
                    # control file matrix exist
                    if not path.exists(p_matrix):
                        p_log.write("[ERROR] -> Matrix transloc " + p_lig_ref +
                                    " " + p_lig + " " + name_lig + "\n")
                        continue

                    # control
                    d_control["lig query"] = d_control["lig query"] + 1

                    # find the path of complex used
                    p_complex = p_dir_dataset + ref_folder + "/" + p_lig.split(
                        "/")[-1][4:]

                    # ligand rotated -> change the referentiel
                    superposeStructure.applyMatrixLigand(lig_parsed, p_matrix)

                    # use substruct
                    l_p_substruct_ref = pathManage.findSubstructRef(
                        pathManage.dataset(name_lig) + ref_folder + "/",
                        name_lig)
                    for p_substruct_ref in l_p_substruct_ref:
                        # ribose or phosphate
                        struct_type = p_substruct_ref.split("_")[-2]
                        substruct_parsed = parsePDB.loadCoordSectionPDB(
                            p_substruct_ref, "HETATM")

                        l_atom_substituate = neighborSearch.searchNeighborAtom(
                            substruct_parsed,
                            lig_parsed,
                            struct_type,
                            p_log,
                            thresold_superimposed_ribose=
                            thresold_superimposed_ribose,
                            thresold_superimposed_pi=thresold_superimposed_pi)
                        # control find
                        if len(l_atom_substituate) == 0:
                            if not struct_type in d_control[
                                    "subref empty"].keys():
                                d_control["subref empty"][struct_type] = 1
                            else:
                                d_control["subref empty"][
                                    struct_type] = d_control["subref empty"][
                                        struct_type] + 1
                            continue

                        else:
                            if not struct_type in d_control["subref"].keys():
                                d_control["subref"][struct_type] = 1
                            else:
                                d_control["subref"][struct_type] = d_control[
                                    "subref"][struct_type] + 1

                            # write PDB file, convert smile
                            p_substituate_pdb = p_dir_result_ref + "substituent_" + pdbfile.split(
                                "_")[0] + "_" + pdbfile.split(
                                    "_")[1] + "_" + struct_type + ".pdb"
                            writePDBfile.coordinateSection(p_substituate_pdb,
                                                           l_atom_substituate,
                                                           recorder="HETATM",
                                                           header=0,
                                                           connect_matrix=1)

                            # sheap reference on part of ligand
                            p_sheap = runOtherSoft.runShaep(
                                p_substruct_ref,
                                p_substituate_pdb,
                                p_substituate_pdb[0:-4] + ".hit",
                                clean=0)
                            val_sheap = parseShaep.parseOutputShaep(p_sheap)
                            if val_sheap == {}:
                                p_log.write("[ERROR] -> ShaEP " +
                                            p_substituate_pdb + " " +
                                            p_substruct_ref + "\n")

                                if not struct_type in d_control[
                                        "out sheap"].keys():
                                    d_control["out sheap"][struct_type] = 1
                                else:
                                    d_control["out sheap"][
                                        struct_type] = d_control["out sheap"][
                                            struct_type] + 1
                                continue

                            # control thresold sheap
                            if not struct_type in d_filout_sheap.keys():
                                d_filout_sheap[struct_type] = {}
                                d_filout_sheap[struct_type] = open(
                                    p_dir_result + "shaep_global_" +
                                    struct_type + ".txt", "w")
                                d_filout_sheap[struct_type].write(
                                    "name\tbest_similarity\tshape_similarity\tESP_similarity\n"
                                )
                                d_filout_sheap["list"].append(
                                    p_dir_result + "shaep_global_" +
                                    struct_type +
                                    ".txt")  # to improve with python function

                            # write value in ShaEP control
                            d_filout_sheap[struct_type].write(
                                ref_folder + "_" + str(pdbfile.split("_")[1]) +
                                "_" + struct_type + "_" +
                                str(pdbfile.split("_")[0]) + "\t" +
                                str(val_sheap["best_similarity"]) + "\t" +
                                str(val_sheap["shape_similarity"]) + "\t" +
                                str(val_sheap["ESP_similarity"]) + "\n")
                            d_filout_sheap["global"].write(
                                ref_folder + "_" + str(pdbfile.split("_")[1]) +
                                "_" + struct_type + "_" +
                                str(pdbfile.split("_")[0]) + "\t" +
                                str(val_sheap["best_similarity"]) + "\t" +
                                str(val_sheap["shape_similarity"]) + "\t" +
                                str(val_sheap["ESP_similarity"]) + "\n")

                            # rename file substituent with shaEP value
                            rename(
                                p_substituate_pdb,
                                p_substituate_pdb[:-4] + "_" +
                                str(val_sheap["best_similarity"]) + ".pdb")
                            # rename and change the file name
                            p_substituate_pdb = p_substituate_pdb[:-4] + "_" + str(
                                val_sheap["best_similarity"]) + ".pdb"

                            # write all substruct in global file
                            writePDBfile.coordinateSection(
                                d_filout_superimposed["global"],
                                lig_parsed,
                                recorder="HETATM",
                                header=str(p_lig.split("/")[-1]) + "_" +
                                str(val_sheap["best_similarity"]),
                                connect_matrix=1)

                            # control sheap thresold
                            if float(val_sheap["best_similarity"]
                                     ) >= thresold_shaep:

                                # write subligand superimposed selected in global files
                                writePDBfile.coordinateSection(
                                    d_filout_superimposed["sheap"],
                                    lig_parsed,
                                    recorder="HETATM",
                                    header=str(p_lig.split("/")[-1]) + "_" +
                                    str(val_sheap["best_similarity"]),
                                    connect_matrix=1)

                                ############
                                # write BS #
                                ############
                                # not only protein superimposed -> also ion and water
                                l_atom_complex = parsePDB.loadCoordSectionPDB(
                                    p_complex)
                                superposeStructure.applyMatrixProt(
                                    l_atom_complex, p_matrix)
                                p_file_cx = p_dir_result_ref + "CX_" + p_lig.split(
                                    "/")[-1]
                                # write CX
                                writePDBfile.coordinateSection(
                                    p_file_cx,
                                    l_atom_complex,
                                    recorder="ATOM",
                                    header=p_lig.split("/")[-1],
                                    connect_matrix=0)

                                # search atom in BS
                                l_atom_binding_site = []
                                for atom_complex in l_atom_complex:
                                    for atom_substruct in lig_parsed:
                                        if parsePDB.distanceTwoatoms(
                                                atom_substruct,
                                                atom_complex) <= thresold_BS:
                                            if not atom_complex in l_atom_binding_site:
                                                l_atom_binding_site.append(
                                                    deepcopy(atom_complex))

                                # 3. retrieve complet residue
                                l_atom_BS_res = parsePDB.getResidues(
                                    l_atom_binding_site, l_atom_complex)

                                # 4. write binding site
                                p_binding = p_dir_result_ref + "BS_" + p_lig.split(
                                    "/")[-1]
                                writePDBfile.coordinateSection(
                                    p_binding,
                                    l_atom_BS_res,
                                    "ATOM",
                                    p_binding,
                                    connect_matrix=0)

                                # smile code substituate analysis
                                # Step smile -> not conversion if shaep not validate
                                smile_find = runOtherSoft.babelConvertPDBtoSMILE(
                                    p_substituate_pdb)
                                if not struct_type in d_smile.keys():
                                    d_smile[struct_type] = {}
                                    d_smile[struct_type][smile_find] = {}
                                    d_smile[struct_type][smile_find][
                                        "count"] = 1
                                    d_smile[struct_type][smile_find]["PDB"] = [
                                        pdbfile.split("_")[1]
                                    ]
                                    d_smile[struct_type][smile_find][
                                        "ligand"] = [pdbfile.split("_")[0]]
                                    d_smile[struct_type][smile_find]["ref"] = [
                                        ref_folder
                                    ]
                                else:
                                    if not smile_find in d_smile[
                                            struct_type].keys():
                                        d_smile[struct_type][smile_find] = {}
                                        d_smile[struct_type][smile_find][
                                            "count"] = 1
                                        d_smile[struct_type][smile_find][
                                            "PDB"] = [pdbfile.split("_")[1]]
                                        d_smile[struct_type][smile_find][
                                            "ligand"] = [
                                                pdbfile.split("_")[0]
                                            ]
                                        d_smile[struct_type][smile_find][
                                            "ref"] = [ref_folder]
                                    else:
                                        d_smile[struct_type][smile_find][
                                            "count"] = d_smile[struct_type][
                                                smile_find]["count"] + 1
                                        d_smile[struct_type][smile_find][
                                            "PDB"].append(
                                                pdbfile.split("_")[1])
                                        d_smile[struct_type][smile_find][
                                            "ligand"].append(
                                                pdbfile.split("_")[0])
                                        d_smile[struct_type][smile_find][
                                            "ref"].append(ref_folder)

                            else:
                                if not struct_type in d_control[
                                        "out sheap"].keys():
                                    d_control["out sheap"][struct_type] = 1
                                else:
                                    d_control["out sheap"][
                                        struct_type] = d_control["out sheap"][
                                            struct_type] + 1

        tool.closeDicoFile(d_filout_superimposed)

    # sheap control
    tool.closeDicoFile(d_filout_sheap)
    for p_file_sheap in d_filout_sheap["list"]:
        runOtherSoft.RhistogramMultiple(p_file_sheap)

    # write list of smile
    for substruct in d_smile.keys():
        p_list_smile = pathManage.result(
            name_lig) + "list_" + substruct + "_" + str(
                thresold_shaep) + "_smile.txt"
        filout_smile = open(p_list_smile, "w")
        for smile_code in d_smile[substruct].keys():
            l_lig = d_smile[substruct][smile_code]["ligand"]
            l_PDB = d_smile[substruct][smile_code]["PDB"]
            l_ref = d_smile[substruct][smile_code]["ref"]
            filout_smile.write(
                str(smile_code) + "\t" +
                str(d_smile[substruct][smile_code]["count"]) + "\t" +
                " ".join(l_PDB) + "\t" + " ".join(l_ref) + "\t" +
                " ".join(l_lig) + "\n")
        filout_smile.close()
    p_log.close()

    # control
    filout_control.write("NB ref: " + str(d_control["pr ref"]) + "\n")
    filout_control.write("Ligand query: " + str(d_control["lig query"]) + "\n")
    for k in d_control["subref"].keys():
        filout_control.write("LSR " + str(k) + ": " +
                             str(d_control["subref"][k]) + "\n")
    for k in d_control["subref empty"].keys():
        filout_control.write("NB LSR empty " + str(k) + ": " +
                             str(d_control["subref empty"][k]) + "\n")
    for k in d_control["out sheap"].keys():
        filout_control.write("LSR out by sheap " + str(k) + ": " +
                             str(d_control["out sheap"][k]) + "\n")

    filout_control.write("**********************\n\n")
    for k in d_control["subref"].keys():
        filout_control.write("LSR keep" + str(k) + ": " +
                             str(d_control["subref"][k] -
                                 d_control["out sheap"][k]) + "\n")

    filout_control.close()

    return 1

Example #7

Show file

def analyseIons (pr_dataset, name_ligand, p_filout, thresold_max_interaction = 4.0) : 

    l_folder_ref = listdir(pr_dataset)

    filout = open (p_filout, "w")
    if name_ligand == "ATP" : 
        filout.write ("PDB\tIon\tD1\tD2\tD3\tAngle1\tAngle2\tAt1\tAt2\tA3\n")
    else : 
        filout.write ("PDB\tIon\tD1\tD2\tAngle\tAt1\tAt2\n")
    
    # dictionnary of counting
    d_count = {}
    d_count["CX"] = 0
    d_count["CX + ions"] = 0
    d_count["BS + ions"] = 0
    d_count["BS + 1-ion"] = 0
    d_count["BS + 2-ions"] = 0
    d_count["BS + more-ions"] = 0
    d_count["Interact-1"] = 0
    d_count["Interact-2"] = 0

    
    # dictionnary by ions
    d_ions = {}
    
    for ref_folder in l_folder_ref  :
        only_one = 0
        if len (ref_folder) != 4 : 
            continue
        d_count["CX"] = d_count["CX"] + 1
        l_temp = []
        # path and complex
        p_lig_ref = pathManage.findligandRef(pr_dataset + ref_folder + "/", name_ligand)
        p_complex = pathManage.findPDBRef(pr_dataset + ref_folder + "/")
    
        # parsing
        lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
        l_het_parsed = parsePDB.loadCoordSectionPDB(p_complex, "HETATM")
    
        # retrieve phosphate
        l_pi = retrieveTwoAtomForAngle (lig_ref_parsed, name_ligand)
        if l_pi == [] : # case ligand without phosphate 
            continue 
        flag_interact = 0
        flag_between_1 = 0
        flag_between_2 = 0
        for het_parsed in l_het_parsed : 
            if het_parsed["resName"] in l_ions : 
                d_count["CX + ions"] = d_count["CX + ions"] + 1
                if not het_parsed ["resName"] in d_ions.keys () : 
                    d_ions[het_parsed["resName"]] = 0
                if not het_parsed["resName"] in l_temp :  
                    d_ions[het_parsed["resName"]] = d_ions[het_parsed["resName"]] + 1
                    l_temp.append (het_parsed["resName"])
                PDB_id = ref_folder
                d1 = parsePDB.distanceTwoatoms(l_pi[0], het_parsed)
                d2 = parsePDB.distanceTwoatoms(l_pi[1], het_parsed)
                if name_ligand == "ATP" : 
                    # print len(l_pi), ref_folder, p_lig_ref
                    d3 = parsePDB.distanceTwoatoms(l_pi[2], het_parsed)
                    angle_bis = parsePDB.angleVector(l_pi[1], het_parsed, l_pi[2])
                angle = parsePDB.angleVector(l_pi[0], het_parsed, l_pi[1])
            
                if d1 < 10 and d2 < 10 : 
                    if not het_parsed["resName"] in d_count.keys () : 
                        d_count[het_parsed["resName"]] = 0
                    if only_one == 0 : 
                        d_count[het_parsed["resName"]] = d_count[het_parsed["resName"]] + 1
                        only_one = 1
                    d_count["BS + ions"] = d_count["BS + ions"] + 1
                    flag_interact = flag_interact + 1
                    if d1 < thresold_max_interaction and d2 < thresold_max_interaction : 
                        flag_between_1 = flag_between_1 + 1


                    if name_ligand == "ATP" :
                        if d3 < thresold_max_interaction and d2 < thresold_max_interaction : 
                            flag_between_2 = flag_between_2 + 1
                        filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str (d3) + "\t" + str(angle) + "\t" + str(angle_bis) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\t" + str(l_pi[2]["serial"]) + "\n")
                    else : 
                        filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str(angle) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\n")
    
        if flag_interact == 1 : 
            d_count["BS + 1-ion"] = d_count["BS + 1-ion"] + 1
        elif flag_interact == 2 : 
            d_count["BS + 2-ions"] = d_count["BS + 2-ions"] + 1
        elif flag_interact > 2 : 
            d_count["BS + more-ions"] = d_count["BS + more-ions"] + 1

        if flag_between_1 >= 1 : 
            d_count["Interact-1"] = d_count["Interact-1"] + flag_between_1
        if flag_between_2 >= 1 : 
            d_count["Interact-2"] = d_count["Interact-2"] + flag_between_2 

    filout.close ()
    
    filout_count = open (p_filout[0:-4] + "count.txt", "w")
    filout_count.write ("CX: " + str (d_count["CX"]) + "\n")
    filout_count.write ("CX + ions: " + str (d_count["CX + ions"]) + "\n")
    filout_count.write ("BS + ions: " + str(d_count["BS + ions"]) + "\n")
    filout_count.write ("BS + 1-ion: " + str(d_count["BS + 1-ion"]) + "\n")
    filout_count.write ("BS + 2-ions: " + str(d_count["BS + 2-ions"]) + "\n")
    filout_count.write ("BS + more-ions: " + str(d_count["BS + more-ions"]) + "\n")
    filout_count.write ("Interact Pi-alpha + Pi-beta: " + str(d_count["Interact-1"]) + "\n")
    filout_count.write ("Interact Pi-beta + Pi-gama: " + str(d_count["Interact-2"]) + "\n")
    filout_count.close ()

    filout_by_ion = open(p_filout[0:-4] + "byIons_" + name_ligand, "w")
    l_k = d_ions.keys ()
    for k in l_k : 
        filout_by_ion.write (str (k.capitalize()) + "\t" + str (d_ions[k]) + "\n")
    filout_by_ion.close ()
   
    runOtherSoft.barplot (p_filout[0:-4] + "byIons_" + name_ligand)

Example #8

Show file

def computeRMSDBS (p_ref, p_query, p_substruct, pr_result, thresold_BS = 6) :
    
    
    l_atom_query_parsed = parsePDB.loadCoordSectionPDB(p_query, "ATOM")
    l_atom_ref_parsed = parsePDB.loadCoordSectionPDB(p_ref, "ATOM")
    
    l_atom_substruct = parsePDB.loadCoordSectionPDB(p_substruct)
    
    
    
    l_BS_ref = []
    
    for atom_substruct in l_atom_substruct : 
        for atom_ref in l_atom_ref_parsed : 
            d_atom = parsePDB.distanceTwoatoms(atom_substruct, atom_ref)
            if d_atom <= thresold_BS : 
                l_BS_ref.append (atom_ref)
    # retrieve residue full
    l_BS_ref = parsePDB.getResidues(l_BS_ref, l_atom_ref_parsed)
    
#     print len (l_BS_ref)
#     print len (l_atom_query_parsed)
    
    
    l_BS_query = []
    flag_identic_crystal = 1
    for atomBS_ref in l_BS_ref :
#         print  atomBS_parsed 
        d_max = 100.0 
        for atom_query in l_atom_query_parsed :
            if atom_query["resName"] ==  atomBS_ref["resName"] and atom_query["name"] ==  atomBS_ref["name"] : 
                d = parsePDB.distanceTwoatoms(atom_query, atomBS_ref)
                if d < d_max : 
                    d_max = d
                    res_temp = atom_query
                
        
        #if d_max < thresold_BS : 
        if "res_temp" in locals () :     
            l_BS_query.append (deepcopy(res_temp))
        # identic check number
            if res_temp["resSeq"] != atomBS_ref["resSeq"] : 
                flag_identic_crystal = 0
        #else : 
            # case structure not found
        #    return []
    
    
#     print len (l_BS_query), len (l_BS_ref)
    l_RMSD = RMSDTwoList (l_BS_query, l_BS_ref)
    
    # write PDB
    #p_filout_pdb = pr_result + p_query.split ("/")[-1][0:-4] + "_" + str (flag_identic_crystal) + "_" + p_substruct.split ("_")[-2] + "_" + p_ref.split ("/")[-1]
    #filout_pdb = open (p_filout_pdb, "w")
    #writePDBfile.coordinateSection(filout_pdb, l_BS_ref, recorder = "ATOM")
    #writePDBfile.coordinateSection(filout_pdb, l_BS_query, recorder = "ATOM", header = 0 )
    #filout_pdb.close ()
    
    if l_RMSD == [] : 
        return []
    else : 
        return l_RMSD + [flag_identic_crystal]