Python babelConvertPDBtoSMILE Examples

Programming Language: Python

Namespace/Package Name: runOtherSoft

Method/Function: babelConvertPDBtoSMILE

Examples at hotexamples.com: 5

Python babelConvertPDBtoSMILE - 5 examples found. These are the top rated real world Python examples of runOtherSoft.babelConvertPDBtoSMILE extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: main.py Project: ABorrel/LSRs

def retrieveSubstructSuperimposed (name_lig, thresold_BS = 4.5, thresold_superimposed_ribose = 2.5, thresold_superimposed_pi = 3, thresold_shaep = 0.4):

    # ouput
    p_dir_dataset = pathManage.dataset(name_lig)
    p_dir_result = pathManage.result(name_lig )
    l_folder_ref = listdir(p_dir_dataset)

    # log control
    p_log = open(p_dir_result + "log_superimposed.txt", "w")

    # control extraction
    d_control = {}
    d_control["pr ref"] = 0
    d_control["lig query"] = 0
    d_control["subref"] = {}
    d_control["subref empty"] = {}
    d_control["out sheap"] = {}
    filout_control = open (p_dir_result + "quality_extraction.txt", "w")

    # stock smile code
    d_smile = {}

    # sheap control
    d_filout_sheap = {}
    d_filout_sheap ["list"] = [p_dir_result + "shaep_global.txt"]
    d_filout_sheap["global"] = open (p_dir_result + "shaep_global.txt", "w") 
    d_filout_sheap["global"].write ("name\tbest_similarity\tshape_similarity\tESP_similarity\n")

    for ref_folder in l_folder_ref :
        # control folder reference name
        if len (ref_folder) != 4 : 
            p_log.write ("[ERROR folder] -> " + ref_folder + "\n")
            continue

        # reference
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/", name_lig)
        try:
            lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
#             print len (lig_ref_parsed)
        except:
            p_log.write ("[ERROR ligand ref] -> " + p_lig_ref + "\n")
            continue

        #control
        d_control["pr ref"] = d_control["pr ref"] + 1

        # output by reference
        p_dir_result_ref = pathManage.result(name_lig + "/" + ref_folder)
        d_filout_superimposed = {}
        d_filout_superimposed["global"] = open (p_dir_result_ref + "all_ligand_aligned.pdb", "w")
        d_filout_superimposed["sheap"] = open (p_dir_result_ref + "all_ligand_aligned_" + str (thresold_shaep)  + ".pdb", "w")
        
        
        
        # write lig ref -> connect matrix corrrect in all reference and all sheap
        writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_ref_parsed, "HETATM", connect_matrix = 1)
        writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_ref_parsed, "HETATM", connect_matrix = 1)
        
        # inspect folder dataset
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile : 
            # no ligand file
            if len (pdbfile.split ("_")) == 1 : 
                continue
            pdbfile = pdbfile[:-4] # remove extention
            
            if len(pdbfile.split ("_")[0]) == 3  and len(pdbfile.split ("_")[1]) == 4 and pdbfile.split ("_")[1] != ref_folder:
                p_lig = p_dir_dataset + ref_folder + "/" + pdbfile  + ".pdb"
                if p_lig_ref != p_lig : 
                    # pass case where ligand replace same ligand -> does not need run
                    if pdbfile.split ("_")[0] == name_lig : 
                        p_log.write ("[REMOVE] -> same ligand substituate")
                        continue
                    
                    # parsed ligand query
                    lig_parsed = parsePDB.loadCoordSectionPDB(p_lig, "HETATM")

                    # find matrix of rotation
                    p_matrix = pathManage.findMatrix(p_lig_ref, p_lig, name_lig)
                    # control file matrix exist
                    if not path.exists(p_matrix) : 
                        p_log.write ("[ERROR] -> Matrix transloc " + p_lig_ref + " " + p_lig + " " + name_lig + "\n")
                        continue
                    
                    # control
                    d_control["lig query"] = d_control["lig query"] + 1
                    
                    # find the path of complex used
                    p_complex = p_dir_dataset + ref_folder + "/" + p_lig.split ("/")[-1][4:]
                    
                    # ligand rotated -> change the referentiel
                    superposeStructure.applyMatrixLigand(lig_parsed, p_matrix)
                    
                    
                    # use substruct
                    l_p_substruct_ref = pathManage.findSubstructRef (pathManage.dataset(name_lig) + ref_folder + "/" , name_lig)
                    for p_substruct_ref in l_p_substruct_ref : 
                        # ribose or phosphate
                        struct_type = p_substruct_ref.split ("_")[-2]
                        substruct_parsed = parsePDB.loadCoordSectionPDB(p_substruct_ref, "HETATM")
                        
                        l_atom_substituate = neighborSearch.searchNeighborAtom(substruct_parsed, lig_parsed, struct_type, p_log, thresold_superimposed_ribose = thresold_superimposed_ribose, thresold_superimposed_pi = thresold_superimposed_pi)    
                        # control find 
                        if len (l_atom_substituate) == 0 :  
                            if not struct_type in d_control["subref empty"].keys () : 
                                d_control["subref empty"][struct_type] = 1
                            else : 
                                d_control["subref empty"][struct_type] = d_control["subref empty"][struct_type] + 1
                            continue
                        
                        else : 
                            if not struct_type in d_control["subref"].keys () : 
                                d_control["subref"][struct_type] = 1
                            else : 
                                d_control["subref"][struct_type] = d_control["subref"][struct_type] + 1
                            
                            # write PDB file, convert smile
                            p_substituate_pdb = p_dir_result_ref + "substituent_" + pdbfile.split ("_")[0] + "_" + pdbfile.split ("_")[1] + "_" + struct_type + ".pdb"
                            writePDBfile.coordinateSection(p_substituate_pdb, l_atom_substituate, recorder="HETATM", header=0, connect_matrix = 1)
    
                            # sheap reference on part of ligand
                            p_sheap = runOtherSoft.runShaep (p_substruct_ref, p_substituate_pdb, p_substituate_pdb[0:-4] + ".hit", clean = 0)
                            val_sheap = parseShaep.parseOutputShaep (p_sheap)
                            if val_sheap == {} : 
                                p_log.write ("[ERROR] -> ShaEP " + p_substituate_pdb + " " + p_substruct_ref + "\n")
                                
                                if not struct_type in d_control["out sheap"].keys () :
                                    d_control["out sheap"][struct_type] = 1
                                else : 
                                    d_control["out sheap"][struct_type] = d_control["out sheap"][struct_type] + 1
                                continue
                            
                            # control thresold sheap
                            if not struct_type in d_filout_sheap.keys () : 
                                d_filout_sheap[struct_type] = {}
                                d_filout_sheap[struct_type] = open (p_dir_result + "shaep_global_" + struct_type + ".txt", "w")
                                d_filout_sheap[struct_type].write ("name\tbest_similarity\tshape_similarity\tESP_similarity\n")
                                d_filout_sheap["list"].append (p_dir_result + "shaep_global_" + struct_type + ".txt") # to improve with python function
                            
                            # write value in ShaEP control
                            d_filout_sheap[struct_type].write (ref_folder + "_" +  str(pdbfile.split ("_")[1]) + "_" + struct_type + "_" + str (pdbfile.split ("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n")
                            d_filout_sheap["global"].write (ref_folder + "_" +  str(pdbfile.split ("_")[1]) + "_" + struct_type + "_" + str (pdbfile.split ("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n")
                            
                            # rename file substituent with shaEP value
                            rename(p_substituate_pdb, p_substituate_pdb[:-4] + "_" + str (val_sheap["best_similarity"]) + ".pdb")
                            # rename and change the file name
                            p_substituate_pdb = p_substituate_pdb[:-4] + "_" + str (val_sheap["best_similarity"]) + ".pdb"
                            
                            # write all substruct in global file
                            writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_parsed, recorder= "HETATM", header = str(p_lig.split ("/")[-1]) + "_" + str (val_sheap["best_similarity"]) ,  connect_matrix = 1)
                            
                            # control sheap thresold    
                            if float(val_sheap["best_similarity"]) >= thresold_shaep  : 
                                
                                # write subligand superimposed selected in global files
                                writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_parsed, recorder= "HETATM", header = str(p_lig.split ("/")[-1]) + "_" + str (val_sheap["best_similarity"]) ,  connect_matrix = 1)
                                
                                ############
                                # write BS #
                                ############
                                # not only protein superimposed -> also ion and water
                                l_atom_complex = parsePDB.loadCoordSectionPDB(p_complex)
                                superposeStructure.applyMatrixProt(l_atom_complex, p_matrix)
                                p_file_cx = p_dir_result_ref +  "CX_" + p_lig.split ("/")[-1]
                                # write CX
                                writePDBfile.coordinateSection(p_file_cx, l_atom_complex, recorder="ATOM", header= p_lig.split ("/")[-1], connect_matrix = 0)
    
                                # search atom in BS
                                l_atom_binding_site = []
                                for atom_complex in l_atom_complex : 
                                    for atom_substruct in lig_parsed : 
                                        if parsePDB.distanceTwoatoms (atom_substruct, atom_complex) <= thresold_BS :
                                            if not atom_complex in l_atom_binding_site : 
                                                l_atom_binding_site.append (deepcopy(atom_complex))
                                
                                # 3. retrieve complet residue
                                l_atom_BS_res = parsePDB.getResidues(l_atom_binding_site, l_atom_complex)
                                                
                                # 4. write binding site
                                p_binding = p_dir_result_ref +  "BS_" + p_lig.split ("/")[-1]
                                writePDBfile.coordinateSection(p_binding, l_atom_BS_res, "ATOM", p_binding, connect_matrix = 0)
                                
                                # smile code substituate analysis                    
                                # Step smile -> not conversion if shaep not validate 
                                smile_find = runOtherSoft.babelConvertPDBtoSMILE(p_substituate_pdb)
                                if not struct_type in d_smile.keys ()  :
                                    d_smile[struct_type] = {}
                                    d_smile[struct_type][smile_find] = {}
                                    d_smile[struct_type][smile_find]["count"] = 1
                                    d_smile[struct_type][smile_find]["PDB"] = [pdbfile.split ("_")[1]]
                                    d_smile[struct_type][smile_find]["ligand"] = [pdbfile.split ("_")[0]]
                                    d_smile[struct_type][smile_find]["ref"] = [ref_folder]
                                else : 
                                    if not smile_find in d_smile[struct_type].keys () : 
                                        d_smile[struct_type][smile_find] = {}
                                        d_smile[struct_type][smile_find]["count"] = 1
                                        d_smile[struct_type][smile_find]["PDB"] = [pdbfile.split ("_")[1]]
                                        d_smile[struct_type][smile_find]["ligand"] = [pdbfile.split ("_")[0]] 
                                        d_smile[struct_type][smile_find]["ref"] = [ref_folder]
                                    else : 
                                        d_smile[struct_type][smile_find]["count"] = d_smile[struct_type][smile_find]["count"] + 1
                                        d_smile[struct_type][smile_find]["PDB"].append (pdbfile.split ("_")[1])
                                        d_smile[struct_type][smile_find]["ligand"].append (pdbfile.split ("_")[0])
                                        d_smile[struct_type][smile_find]["ref"].append (ref_folder)

                            else : 
                                if not struct_type in d_control["out sheap"].keys () : 
                                    d_control["out sheap"][struct_type] = 1
                                else : 
                                    d_control["out sheap"][struct_type] = d_control["out sheap"][struct_type] + 1

        tool.closeDicoFile (d_filout_superimposed)

    # sheap control    
    tool.closeDicoFile (d_filout_sheap)
    for p_file_sheap in d_filout_sheap["list"] : 
        runOtherSoft.RhistogramMultiple (p_file_sheap)    
        
            
    # write list of smile
    for substruct in d_smile.keys () : 
        p_list_smile = pathManage.result(name_lig) + "list_" + substruct + "_" + str (thresold_shaep) + "_smile.txt"
        filout_smile = open (p_list_smile, "w")
        for smile_code in d_smile[substruct].keys () : 
            l_lig = d_smile[substruct][smile_code]["ligand"]
            l_PDB = d_smile[substruct][smile_code]["PDB"]
            l_ref = d_smile[substruct][smile_code]["ref"]
            filout_smile.write (str (smile_code) + "\t" + str (d_smile[substruct][smile_code]["count"]) + "\t" + " ".join (l_PDB) + "\t" + " ".join (l_ref) + "\t" + " ".join(l_lig) + "\n")
        filout_smile.close ()
    p_log.close ()
    
    # control
    filout_control.write ("NB ref: " + str(d_control["pr ref"]) + "\n")
    filout_control.write ("Ligand query: " + str(d_control["lig query"]) + "\n")
    for k in d_control["subref"].keys () :
        filout_control.write ("LSR " + str (k) + ": " + str(d_control["subref"][k]) + "\n")
    for k in d_control["subref empty"].keys () :
        filout_control.write ("NB LSR empty " + str (k) + ": " + str(d_control["subref empty"][k]) + "\n")
    for k in d_control["out sheap"].keys () :
        filout_control.write ("LSR out by sheap " + str (k) + ": " + str(d_control["out sheap"][k]) + "\n")
    
    filout_control.write ("**********************\n\n")
    for k in d_control["subref"].keys () :
        filout_control.write ("LSR keep" + str (k) + ": " + str(d_control["subref"][k] - d_control["out sheap"][k]) + "\n")
    
    filout_control.close ()
    
    return 1

Example #2

Show file

File: arrangeResult.py Project: ABorrel/LSRs

def globalArrangement (pr_orgin, p_smile, p_family, name_ligand, l_ligand_out):
    
#     print "--------"
#     print pr_orgin
#     print p_smile
#     print p_family
#     print name_ligand
#     print "--------"
    
    
    subst = p_smile.split ("_")[-3]
    
    filin = open (p_smile, "r")
    l_line_smile = filin.readlines ()
    filin.close()
    
    for line_smile in l_line_smile : 
        
        # search substructure
#         print line_smile
        l_PDB_query = line_smile.split ("\t")[-3].split (" ")
#         print l_PDB_query
        l_PDB_ref = line_smile.split ("\t")[-2].split (" ")
        l_ligand = line_smile.strip().split ("\t")[-1].split (" ")
        
        # search replacement
        smile = line_smile.split ("\t")[0]
        
        # search if LSR is small -> thresold < 3
        small_LSR = smileAnalysis.smallLSR (smile) 
        if subst == "ribose" :  
            if small_LSR == 1 : 
                first_folder = "ribose_small"
            else : 
                first_folder = "ribose"
        else : 
            if small_LSR == 1 : 
                first_folder = "Pi_small"
            else : 
                first_folder = "Pi"
        
        
        print smile, l_PDB_query, l_PDB_ref, l_ligand, subst, small_LSR
        replacement, metal = smileAnalysis.searchReplacement (smile, l_PDB_query[0], l_PDB_ref[0], name_ligand)
        
        # case with cycle -> search replacement 2
        if replacement == "cycle" : 
            replacement2, metal = smileAnalysis.searchReplacement (smile, l_PDB_query[0], l_PDB_ref[0], name_ligand, in_cycle = 1)
            replacement = replacement + "/" + replacement2 # new folder

        # case metal
        if replacement == "metal" : 
            print metal, l_PDB_query, l_PDB_ref, name_ligand
        
        len_find = len (l_PDB_ref)
        i = 0
        while i < len_find : 
            
            # exclusion of ligand out
            if l_ligand[i] in l_ligand_out : 
                i = i + 1
                continue
            
            
            group, family = analysis.findFamilyAndGroup(l_PDB_ref[i])
            
            # folder reference
            pr_dataset = pathManage.dataset(name_ligand + "/" + l_PDB_ref[i])
            
            PDB_ref = pathManage.findPDBRef(pr_dataset)
            p_ligand_ref = pathManage.findligandRef(pr_dataset , name_ligand)
            l_frag_ref = pathManage.findSubstructRef(pr_dataset, name_ligand)
            for f_ref in l_frag_ref :
                if search (subst, f_ref) : 
                    p_frag_ref = f_ref
                    break
            
            # folder_query
            pr_result = pathManage.result(name_ligand + "/" + l_PDB_ref[i])
            l_protein_tranloc = pathManage.findPDBQueryTransloc(pr_result)
            for p_t in l_protein_tranloc : 
                if search (l_ligand[i], p_t) and search (l_PDB_query[i], p_t) : 
                    p_protein_query = p_t
                    break
                
            if replacement != "metal" : 
                p_lig_query = pathManage.findligandQuery(pr_dataset , l_ligand[i], l_PDB_query[i])
            else : 
                p_lig_query = pathManage.findligandQuery(pr_dataset ,metal, l_PDB_query[i])
            # need apply transloc matrix
            matrix_transloc = pathManage.findMatrix(p_ligand_ref, p_lig_query, name_ligand)
            lig_query_parsed = parsePDB.loadCoordSectionPDB(p_lig_query)
            try : superposeStructure.applyMatrixLigand(lig_query_parsed, matrix_transloc)
            except : 
                i = i + 1
                continue
            
            
            p_lig_substituate = pathManage.findSubstructFind(pr_result, l_ligand[i], l_PDB_query[i], subst)
            l_p_BS = pathManage.findFileBS(pr_result, l_PDB_query[i])
            for BS in l_p_BS : 
                if search (l_ligand[i], BS) : 
                    p_BS = BS
                    break
            
            
#             print pr_final
#             print "***************"
#             print PDB_ref
#             print p_ligand_ref
#             print p_frag_ref
#             print "----"
#             print p_protein_query
#             print p_lig_query
#             print p_lig_substituate
#             print p_BS
#             print "**************"
            # ajouter group + family 2 lettre
            pr_final = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-"  + str (group) + "_" + l_PDB_ref[i] +  "/" 
            pr_ligand = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-" +  str (group) + "_" + l_PDB_ref[i] + "/LGD/"
            pr_BS = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-" + str (group) + "_" + l_PDB_ref[i] + "/BS/"
            pr_sust = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-"  + str (group) + "_" + l_PDB_ref[i] + "/LSR/"
            
            if not path.isdir(pr_final):
                makedirs (pr_final)
            
            if not path.isdir(pr_ligand):
                makedirs (pr_ligand)
            
            if not path.isdir(pr_BS):
                makedirs (pr_BS)
                
            if not path.isdir(pr_sust):
                makedirs (pr_sust)   
            
            # list file
            p_list_smile_queries = pr_sust + "list.smile"
            if not path.exists(p_list_smile_queries) : 
                file_smile_queries = open (p_list_smile_queries, "w")
            else : 
                file_smile_queries = open (p_list_smile_queries, "a")
            file_smile_queries.write (str(smile) + "\n")
            file_smile_queries.close ()
            
            # lig de la query
            writePDBfile.coordinateSection(pr_ligand + "LGD_" + p_lig_query.split ("/")[-1], lig_query_parsed, recorder = "HETATM", header = "LCG_" + p_lig_query.split ("/")[-1], connect_matrix = 1)
            runOtherSoft.babelConvertPDBtoSMILE(pr_ligand + "LGD_" + p_lig_query.split ("/")[-1], clean_smi = 1)
            # lig de reference + smile
            copy2(p_ligand_ref, pr_ligand + "LGD_REF_" + p_ligand_ref.split ("/")[-1])
            runOtherSoft.babelConvertPDBtoSMILE(pr_ligand + "LGD_REF_" + p_ligand_ref.split ("/")[-1])
            # LSR de ref
            copy2(p_frag_ref, pr_sust + "LSR_REF_" + name_ligand + "_" + l_PDB_ref[i] + ".pdb")
            # protein query
            #copy2(p_protein_query, pr_final)
            # LSR query -> p_lig_ref only for the name
            copy2(p_lig_substituate, pr_sust + "LSR_" + subst + "_"  + p_lig_query.split ("/")[-1])
            # BS query
            copy2(p_BS, pr_BS)   
            
            # BS from reference
            l_atom_BS = parsePDB.computeBS (PDB_ref, p_ligand_ref, thresold = 4.50, option_onlyATOM = 0)
            writePDBfile.coordinateSection(pr_BS + "BS_REF_" + name_ligand + "_" + PDB_ref.split ("/")[-1], l_atom_BS, recorder = "ATOM", header = "BS_REF_" + name_ligand + "_" + PDB_ref, connect_matrix = 0)
            
            i = i + 1
    
    return 1

Example #3

Show file

def retrieveSubstructSuperimposed(name_lig,
                                  thresold_BS=4.5,
                                  thresold_superimposed_ribose=2.5,
                                  thresold_superimposed_pi=3,
                                  thresold_shaep=0.4):

    # ouput
    p_dir_dataset = pathManage.dataset(name_lig)
    p_dir_result = pathManage.result(name_lig)
    l_folder_ref = listdir(p_dir_dataset)

    # log control
    p_log = open(p_dir_result + "log_superimposed.txt", "w")

    # control extraction
    d_control = {}
    d_control["pr ref"] = 0
    d_control["lig query"] = 0
    d_control["subref"] = {}
    d_control["subref empty"] = {}
    d_control["out sheap"] = {}
    filout_control = open(p_dir_result + "quality_extraction.txt", "w")

    # stock smile code
    d_smile = {}

    # sheap control
    d_filout_sheap = {}
    d_filout_sheap["list"] = [p_dir_result + "shaep_global.txt"]
    d_filout_sheap["global"] = open(p_dir_result + "shaep_global.txt", "w")
    d_filout_sheap["global"].write(
        "name\tbest_similarity\tshape_similarity\tESP_similarity\n")

    for ref_folder in l_folder_ref:
        # control folder reference name
        if len(ref_folder) != 4:
            p_log.write("[ERROR folder] -> " + ref_folder + "\n")
            continue

        # reference
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/",
                                             name_lig)
        try:
            lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
#             print len (lig_ref_parsed)
        except:
            p_log.write("[ERROR ligand ref] -> " + p_lig_ref + "\n")
            continue

        #control
        d_control["pr ref"] = d_control["pr ref"] + 1

        # output by reference
        p_dir_result_ref = pathManage.result(name_lig + "/" + ref_folder)
        d_filout_superimposed = {}
        d_filout_superimposed["global"] = open(
            p_dir_result_ref + "all_ligand_aligned.pdb", "w")
        d_filout_superimposed["sheap"] = open(
            p_dir_result_ref + "all_ligand_aligned_" + str(thresold_shaep) +
            ".pdb", "w")

        # write lig ref -> connect matrix corrrect in all reference and all sheap
        writePDBfile.coordinateSection(d_filout_superimposed["global"],
                                       lig_ref_parsed,
                                       "HETATM",
                                       connect_matrix=1)
        writePDBfile.coordinateSection(d_filout_superimposed["sheap"],
                                       lig_ref_parsed,
                                       "HETATM",
                                       connect_matrix=1)

        # inspect folder dataset
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile:
            # no ligand file
            if len(pdbfile.split("_")) == 1:
                continue
            pdbfile = pdbfile[:-4]  # remove extention

            if len(pdbfile.split("_")[0]) == 3 and len(pdbfile.split(
                    "_")[1]) == 4 and pdbfile.split("_")[1] != ref_folder:
                p_lig = p_dir_dataset + ref_folder + "/" + pdbfile + ".pdb"
                if p_lig_ref != p_lig:
                    # pass case where ligand replace same ligand -> does not need run
                    if pdbfile.split("_")[0] == name_lig:
                        p_log.write("[REMOVE] -> same ligand substituate")
                        continue

                    # parsed ligand query
                    lig_parsed = parsePDB.loadCoordSectionPDB(p_lig, "HETATM")

                    # find matrix of rotation
                    p_matrix = pathManage.findMatrix(p_lig_ref, p_lig,
                                                     name_lig)
                    # control file matrix exist
                    if not path.exists(p_matrix):
                        p_log.write("[ERROR] -> Matrix transloc " + p_lig_ref +
                                    " " + p_lig + " " + name_lig + "\n")
                        continue

                    # control
                    d_control["lig query"] = d_control["lig query"] + 1

                    # find the path of complex used
                    p_complex = p_dir_dataset + ref_folder + "/" + p_lig.split(
                        "/")[-1][4:]

                    # ligand rotated -> change the referentiel
                    superposeStructure.applyMatrixLigand(lig_parsed, p_matrix)

                    # use substruct
                    l_p_substruct_ref = pathManage.findSubstructRef(
                        pathManage.dataset(name_lig) + ref_folder + "/",
                        name_lig)
                    for p_substruct_ref in l_p_substruct_ref:
                        # ribose or phosphate
                        struct_type = p_substruct_ref.split("_")[-2]
                        substruct_parsed = parsePDB.loadCoordSectionPDB(
                            p_substruct_ref, "HETATM")

                        l_atom_substituate = neighborSearch.searchNeighborAtom(
                            substruct_parsed,
                            lig_parsed,
                            struct_type,
                            p_log,
                            thresold_superimposed_ribose=
                            thresold_superimposed_ribose,
                            thresold_superimposed_pi=thresold_superimposed_pi)
                        # control find
                        if len(l_atom_substituate) == 0:
                            if not struct_type in d_control[
                                    "subref empty"].keys():
                                d_control["subref empty"][struct_type] = 1
                            else:
                                d_control["subref empty"][
                                    struct_type] = d_control["subref empty"][
                                        struct_type] + 1
                            continue

                        else:
                            if not struct_type in d_control["subref"].keys():
                                d_control["subref"][struct_type] = 1
                            else:
                                d_control["subref"][struct_type] = d_control[
                                    "subref"][struct_type] + 1

                            # write PDB file, convert smile
                            p_substituate_pdb = p_dir_result_ref + "substituent_" + pdbfile.split(
                                "_")[0] + "_" + pdbfile.split(
                                    "_")[1] + "_" + struct_type + ".pdb"
                            writePDBfile.coordinateSection(p_substituate_pdb,
                                                           l_atom_substituate,
                                                           recorder="HETATM",
                                                           header=0,
                                                           connect_matrix=1)

                            # sheap reference on part of ligand
                            p_sheap = runOtherSoft.runShaep(
                                p_substruct_ref,
                                p_substituate_pdb,
                                p_substituate_pdb[0:-4] + ".hit",
                                clean=0)
                            val_sheap = parseShaep.parseOutputShaep(p_sheap)
                            if val_sheap == {}:
                                p_log.write("[ERROR] -> ShaEP " +
                                            p_substituate_pdb + " " +
                                            p_substruct_ref + "\n")

                                if not struct_type in d_control[
                                        "out sheap"].keys():
                                    d_control["out sheap"][struct_type] = 1
                                else:
                                    d_control["out sheap"][
                                        struct_type] = d_control["out sheap"][
                                            struct_type] + 1
                                continue

                            # control thresold sheap
                            if not struct_type in d_filout_sheap.keys():
                                d_filout_sheap[struct_type] = {}
                                d_filout_sheap[struct_type] = open(
                                    p_dir_result + "shaep_global_" +
                                    struct_type + ".txt", "w")
                                d_filout_sheap[struct_type].write(
                                    "name\tbest_similarity\tshape_similarity\tESP_similarity\n"
                                )
                                d_filout_sheap["list"].append(
                                    p_dir_result + "shaep_global_" +
                                    struct_type +
                                    ".txt")  # to improve with python function

                            # write value in ShaEP control
                            d_filout_sheap[struct_type].write(
                                ref_folder + "_" + str(pdbfile.split("_")[1]) +
                                "_" + struct_type + "_" +
                                str(pdbfile.split("_")[0]) + "\t" +
                                str(val_sheap["best_similarity"]) + "\t" +
                                str(val_sheap["shape_similarity"]) + "\t" +
                                str(val_sheap["ESP_similarity"]) + "\n")
                            d_filout_sheap["global"].write(
                                ref_folder + "_" + str(pdbfile.split("_")[1]) +
                                "_" + struct_type + "_" +
                                str(pdbfile.split("_")[0]) + "\t" +
                                str(val_sheap["best_similarity"]) + "\t" +
                                str(val_sheap["shape_similarity"]) + "\t" +
                                str(val_sheap["ESP_similarity"]) + "\n")

                            # rename file substituent with shaEP value
                            rename(
                                p_substituate_pdb,
                                p_substituate_pdb[:-4] + "_" +
                                str(val_sheap["best_similarity"]) + ".pdb")
                            # rename and change the file name
                            p_substituate_pdb = p_substituate_pdb[:-4] + "_" + str(
                                val_sheap["best_similarity"]) + ".pdb"

                            # write all substruct in global file
                            writePDBfile.coordinateSection(
                                d_filout_superimposed["global"],
                                lig_parsed,
                                recorder="HETATM",
                                header=str(p_lig.split("/")[-1]) + "_" +
                                str(val_sheap["best_similarity"]),
                                connect_matrix=1)

                            # control sheap thresold
                            if float(val_sheap["best_similarity"]
                                     ) >= thresold_shaep:

                                # write subligand superimposed selected in global files
                                writePDBfile.coordinateSection(
                                    d_filout_superimposed["sheap"],
                                    lig_parsed,
                                    recorder="HETATM",
                                    header=str(p_lig.split("/")[-1]) + "_" +
                                    str(val_sheap["best_similarity"]),
                                    connect_matrix=1)

                                ############
                                # write BS #
                                ############
                                # not only protein superimposed -> also ion and water
                                l_atom_complex = parsePDB.loadCoordSectionPDB(
                                    p_complex)
                                superposeStructure.applyMatrixProt(
                                    l_atom_complex, p_matrix)
                                p_file_cx = p_dir_result_ref + "CX_" + p_lig.split(
                                    "/")[-1]
                                # write CX
                                writePDBfile.coordinateSection(
                                    p_file_cx,
                                    l_atom_complex,
                                    recorder="ATOM",
                                    header=p_lig.split("/")[-1],
                                    connect_matrix=0)

                                # search atom in BS
                                l_atom_binding_site = []
                                for atom_complex in l_atom_complex:
                                    for atom_substruct in lig_parsed:
                                        if parsePDB.distanceTwoatoms(
                                                atom_substruct,
                                                atom_complex) <= thresold_BS:
                                            if not atom_complex in l_atom_binding_site:
                                                l_atom_binding_site.append(
                                                    deepcopy(atom_complex))

                                # 3. retrieve complet residue
                                l_atom_BS_res = parsePDB.getResidues(
                                    l_atom_binding_site, l_atom_complex)

                                # 4. write binding site
                                p_binding = p_dir_result_ref + "BS_" + p_lig.split(
                                    "/")[-1]
                                writePDBfile.coordinateSection(
                                    p_binding,
                                    l_atom_BS_res,
                                    "ATOM",
                                    p_binding,
                                    connect_matrix=0)

                                # smile code substituate analysis
                                # Step smile -> not conversion if shaep not validate
                                smile_find = runOtherSoft.babelConvertPDBtoSMILE(
                                    p_substituate_pdb)
                                if not struct_type in d_smile.keys():
                                    d_smile[struct_type] = {}
                                    d_smile[struct_type][smile_find] = {}
                                    d_smile[struct_type][smile_find][
                                        "count"] = 1
                                    d_smile[struct_type][smile_find]["PDB"] = [
                                        pdbfile.split("_")[1]
                                    ]
                                    d_smile[struct_type][smile_find][
                                        "ligand"] = [pdbfile.split("_")[0]]
                                    d_smile[struct_type][smile_find]["ref"] = [
                                        ref_folder
                                    ]
                                else:
                                    if not smile_find in d_smile[
                                            struct_type].keys():
                                        d_smile[struct_type][smile_find] = {}
                                        d_smile[struct_type][smile_find][
                                            "count"] = 1
                                        d_smile[struct_type][smile_find][
                                            "PDB"] = [pdbfile.split("_")[1]]
                                        d_smile[struct_type][smile_find][
                                            "ligand"] = [
                                                pdbfile.split("_")[0]
                                            ]
                                        d_smile[struct_type][smile_find][
                                            "ref"] = [ref_folder]
                                    else:
                                        d_smile[struct_type][smile_find][
                                            "count"] = d_smile[struct_type][
                                                smile_find]["count"] + 1
                                        d_smile[struct_type][smile_find][
                                            "PDB"].append(
                                                pdbfile.split("_")[1])
                                        d_smile[struct_type][smile_find][
                                            "ligand"].append(
                                                pdbfile.split("_")[0])
                                        d_smile[struct_type][smile_find][
                                            "ref"].append(ref_folder)

                            else:
                                if not struct_type in d_control[
                                        "out sheap"].keys():
                                    d_control["out sheap"][struct_type] = 1
                                else:
                                    d_control["out sheap"][
                                        struct_type] = d_control["out sheap"][
                                            struct_type] + 1

        tool.closeDicoFile(d_filout_superimposed)

    # sheap control
    tool.closeDicoFile(d_filout_sheap)
    for p_file_sheap in d_filout_sheap["list"]:
        runOtherSoft.RhistogramMultiple(p_file_sheap)

    # write list of smile
    for substruct in d_smile.keys():
        p_list_smile = pathManage.result(
            name_lig) + "list_" + substruct + "_" + str(
                thresold_shaep) + "_smile.txt"
        filout_smile = open(p_list_smile, "w")
        for smile_code in d_smile[substruct].keys():
            l_lig = d_smile[substruct][smile_code]["ligand"]
            l_PDB = d_smile[substruct][smile_code]["PDB"]
            l_ref = d_smile[substruct][smile_code]["ref"]
            filout_smile.write(
                str(smile_code) + "\t" +
                str(d_smile[substruct][smile_code]["count"]) + "\t" +
                " ".join(l_PDB) + "\t" + " ".join(l_ref) + "\t" +
                " ".join(l_lig) + "\n")
        filout_smile.close()
    p_log.close()

    # control
    filout_control.write("NB ref: " + str(d_control["pr ref"]) + "\n")
    filout_control.write("Ligand query: " + str(d_control["lig query"]) + "\n")
    for k in d_control["subref"].keys():
        filout_control.write("LSR " + str(k) + ": " +
                             str(d_control["subref"][k]) + "\n")
    for k in d_control["subref empty"].keys():
        filout_control.write("NB LSR empty " + str(k) + ": " +
                             str(d_control["subref empty"][k]) + "\n")
    for k in d_control["out sheap"].keys():
        filout_control.write("LSR out by sheap " + str(k) + ": " +
                             str(d_control["out sheap"][k]) + "\n")

    filout_control.write("**********************\n\n")
    for k in d_control["subref"].keys():
        filout_control.write("LSR keep" + str(k) + ": " +
                             str(d_control["subref"][k] -
                                 d_control["out sheap"][k]) + "\n")

    filout_control.close()

    return 1

Example #4

Show file

File: ligandSimilarity.py Project: ABorrel/LSRs

def extractLGDfile(prclassif, prresult):
    """Extract from folder classification """

        # test if file in folder result
    if len(listdir(prresult)) > 1:
        return prresult


    lprref = []
    lfoldergroups = listdir(prclassif)
    for foldergroup in lfoldergroups:
        if foldergroup == "cycle":
            lsubtypes = listdir(prclassif + "/cycle/")
            for subtype in lsubtypes:
                lrefprot = listdir(prclassif + "/cycle/" + subtype)
                for refprot in lrefprot:
                    lprref.append(prclassif + "/cycle/" + subtype + "/" + refprot)
        else:
            lrefprot = listdir(prclassif + "/" +  foldergroup + "/")
            for refprot in lrefprot:
                lprref.append(prclassif + "/" + foldergroup + "/" + refprot)


    lout = []
    for prefprot in lprref:#########################################to reduce
        refprot = prefprot.split("/")[-1]
        if not refprot in lout:
            pathManage.generatePath(prresult + refprot)
            lout.append(refprot)
        # copy file LGD
        lfileLGD = listdir(prefprot + "/LGD/")
        for fileLGD in lfileLGD:
            ligid = fileLGD.split("_")[1]
            if ligid == "REF":
                ligid = fileLGD.split("_")[2]
                pdbid = refprot.split("_")[-1]
                LSR = "REF"
            else:
                pdbid = fileLGD.split("_")[2]
                LSR = prefprot.split("/")[-2].replace("_", "")
                if prefprot.split("/")[-3] == "cycle":
                    LSR = "cycle-" + str(LSR)
            nameout = str(LSR) + "_" + str(ligid) + "_" + str(pdbid) + str(fileLGD[-4:])
            copyfile(prefprot + "/LGD/" + fileLGD, prresult + refprot + "/" + nameout)

        # extract SMILES LSR
        dLSR = {}
        pfileLSR = prresult + refprot + "/listLSRsmiles"
        filoutLSR = open(pfileLSR, "w")
        # header
        ltypeLSR = ["pi1", "pi2", "pi3"]
        filoutLSR.write("\t".join(ltypeLSR) + "\n")

        prLSRin = prefprot + "/LSR/"
        lfileLSR = listdir(prLSRin)
        for fileLSR in lfileLSR:
            if search("^LSR", fileLSR) and search("pdb", fileLSR):
                lelemname = fileLSR.split("_")
                nameLSR = lelemname[1]
                if nameLSR == "REF":
                    continue
                else:
                    lig = lelemname[2]
                    PDBid = lelemname[3]
                    smiles =  runOtherSoft.babelConvertPDBtoSMILE (prLSRin + fileLSR, rm_smi = 1)
                    #print(smiles, "l101 - ligandSimilarity")
                    kin = str(lig) + "-" + PDBid
                    if not kin in dLSR.keys():
                        dLSR[kin] = {}
                        for typeLSR in ltypeLSR:
                            dLSR[kin][typeLSR] = "-"
                    dLSR[kin][nameLSR] = smiles

        # write filout
        for kin in dLSR.keys():
            lsmiles = [dLSR[kin][i] for i in ltypeLSR]
            filoutLSR.write(kin + "\t" + "\t".join(lsmiles) + "\n")
        filoutLSR.close()
    return prresult

Example #5

Show file

File: ligandSimilarity.py Project: papoku/LSRs

def extractLGDfile(prclassif, prresult):
    """Extract from folder classification """

    # test if file in folder result
    #if len(listdir(prresult)) > 1:
    #    return prresult


    lprref = []
    lfoldergroups = listdir(prclassif)
    for foldergroup in lfoldergroups:
        if foldergroup == "cycle":
            lsubtypes = listdir(prclassif + "/cycle/")
            for subtype in lsubtypes:
                lrefprot = listdir(prclassif + "/cycle/" + subtype)
                for refprot in lrefprot:
                    lprref.append(prclassif + "/cycle/" + subtype + "/" + refprot)
        else:
            lrefprot = listdir(prclassif + "/" +  foldergroup + "/")
            for refprot in lrefprot:
                lprref.append(prclassif + "/" + foldergroup + "/" + refprot)


    lout = []
    dLSR = {}
    ltypeLSR = ["pi1", "pi2", "pi3"]
    for prefprot in lprref:#########################################to reduce
        refprot = prefprot.split("/")[-1]
        if not refprot in lout:
            pathManage.generatePath(prresult + refprot)
            lout.append(refprot)
        # copy file LGD
        lfileLGD = listdir(prefprot + "/LGD/")
        for fileLGD in lfileLGD:
            ligid = fileLGD.split("_")[1]
            if ligid == "REF":
                ligid = fileLGD.split("_")[2]
                pdbid = refprot.split("_")[-1]
                LSR = "REF"
            else:
                pdbid = fileLGD.split("_")[2]
                LSR = prefprot.split("/")[-2].replace("_", "")
                if prefprot.split("/")[-3] == "cycle":
                    LSR = "cycle-" + str(LSR)
            nameout = str(LSR) + "_" + str(ligid) + "_" + str(pdbid) + str(fileLGD[-4:])
            copyfile(prefprot + "/LGD/" + fileLGD, prresult + refprot + "/" + nameout)

        # extract SMILES LSR
        folderresult = prresult + refprot + "/"
        if not folderresult in dLSR.keys():
            dLSR[folderresult] = {}

        prLSRin = prefprot + "/LSR/"
        lfileLSR = listdir(prLSRin)
        for fileLSR in lfileLSR:
            #print prefprot + "/LSR/" + fileLSR,"l93===ligandSimilarity"
            if search("^LSR", fileLSR) and search("pdb", fileLSR):
                lelemname = fileLSR.split("_")
                nameLSR = lelemname[1]
                if nameLSR == "REF":
                    continue
                else:
                    lig = lelemname[2]
                    PDBid = lelemname[3]
                    smiles =  runOtherSoft.babelConvertPDBtoSMILE (prLSRin + fileLSR, rm_smi = 1)
                    #print(smiles, "l101 - ligandSimilarity")
                    kin = str(lig) + "-" + PDBid
                    if not kin in dLSR[folderresult].keys():
                        dLSR[folderresult][kin] = {}
                        for typeLSR in ltypeLSR:
                            dLSR[folderresult][kin][typeLSR] = "-"
                    dLSR[folderresult][kin][nameLSR] = smiles
    #print dLSR
    # write filout
    for folderresult in dLSR.keys():
        pfileLSR = folderresult + "listLSRsmiles"

        if path.exists(pfileLSR):
            filoutLSR = open(pfileLSR, "a")
        else:
            filoutLSR = open(pfileLSR, "w")
            filoutLSR.write("\t".join(ltypeLSR) + "\n")

        for kin in dLSR[folderresult].keys():
            lsmiles = [dLSR[folderresult][kin][i] for i in ltypeLSR]
            print lsmiles, "l.122 ligandSimilarity.py"
            filoutLSR.write(kin + "\t" + "\t".join(lsmiles) + "\n")
        filoutLSR.close()
    return prresult