Example #1
0
def searchNeighborAtom(substruct_parsed,
                       lig_query_parsed,
                       struct_type,
                       log_file,
                       thresold_superimposed_ribose=2.5,
                       thresold_superimposed_pi=3):

    l_atom_substituate = []
    if struct_type == "ribose":
        for atom_substruct in substruct_parsed:
            for atom_query in lig_query_parsed:
                if parsePDB.distanceTwoatoms(
                        atom_substruct,
                        atom_query) <= thresold_superimposed_ribose:
                    out = copy(atom_query)
                    if not out in l_atom_substituate:
                        l_atom_substituate.append(out)

    else:
        l_atom_interest = retrievePi(substruct_parsed)
        for atom_interest in l_atom_interest:
            for atom_query in lig_query_parsed:
                if parsePDB.distanceTwoatoms(
                        atom_interest, atom_query) <= thresold_superimposed_pi:
                    out = copy(atom_query)
                    if not out in l_atom_substituate:
                        l_atom_substituate.append(out)

    # control out empty
    if l_atom_substituate == []:
        log_file.write("[Not substituate] -> " +
                       substruct_parsed[0]["resName"] + struct_type + "\n")
        return []
    else:
        return l_atom_substituate
Example #2
0
def searchNeighborAtom(substruct_parsed, lig_query_parsed, struct_type, log_file, thresold_superimposed_ribose = 2.5, thresold_superimposed_pi = 3 ) : 
    
    l_atom_substituate = []
    if struct_type == "ribose" : 
        for atom_substruct in substruct_parsed :
            for atom_query in lig_query_parsed : 
                if parsePDB.distanceTwoatoms(atom_substruct, atom_query) <= thresold_superimposed_ribose : 
                    out = copy(atom_query)
                    if not out in l_atom_substituate : 
                        l_atom_substituate.append (out)
        
    else : 
        l_atom_interest = retrievePi (substruct_parsed)
        for atom_interest in l_atom_interest :
            for atom_query in lig_query_parsed : 
                if parsePDB.distanceTwoatoms(atom_interest, atom_query) <= thresold_superimposed_pi : 
                    out = copy(atom_query)
                    if not out in l_atom_substituate : 
                        l_atom_substituate.append (out)
    
    # control out empty
    if l_atom_substituate == [] : 
        log_file.write ("[Not substituate] -> " + substruct_parsed[0] ["resName"]+ struct_type + "\n")
        return []
    else : 
        return l_atom_substituate
Example #3
0
def RMSDTwoList (l_atom1, l_atom2) : 
    
    nb_ca = 0.0
    d_max = {"value": 0.0}
    diff_position_all = 0.0
    diff_position_ca = 0.0
    
    if len (l_atom1) != len (l_atom2) or len (l_atom2) == 0 : 
        print "ERROR - RMSD: list length different or null"
        return []
    else : 
        i = 0
        while i < len (l_atom1): 
            if l_atom1[i]["name"] != l_atom2[i]["name"] and l_atom1[i]["resName"] != l_atom2[i]["resName"]: 
                print l_atom1[i]["name"] , l_atom2[i]["name"]
                print "ERROR"
                return []
            else : 
                d_atom = parsePDB.distanceTwoatoms(l_atom1[i], l_atom2[i])
                diff_position_all = diff_position_all + d_atom
                
                if l_atom1[i]["name"] == "CA" : 
                    diff_position_ca = diff_position_ca + d_atom
                    nb_ca = nb_ca + 1
                
                if d_atom > d_max["value"] : 
                    d_max["value"] = d_atom
                    d_max["atom"] = l_atom1[i]["name"] + "-" +  l_atom2[i]["name"] + "_" + l_atom1[i]["resName"] + "-" +  l_atom2[i]["resName"]
                    
            i = i + 1
#     print d_max
    return [sqrt(diff_position_all / len (l_atom1)), sqrt (diff_position_ca / nb_ca), d_max["value"], len (l_atom1)]
Example #4
0
File: main.py Project: ABorrel/LSRs
def retrieveSubstructSuperimposed (name_lig, thresold_BS = 4.5, thresold_superimposed_ribose = 2.5, thresold_superimposed_pi = 3, thresold_shaep = 0.4):

    # ouput
    p_dir_dataset = pathManage.dataset(name_lig)
    p_dir_result = pathManage.result(name_lig )
    l_folder_ref = listdir(p_dir_dataset)

    # log control
    p_log = open(p_dir_result + "log_superimposed.txt", "w")

    # control extraction
    d_control = {}
    d_control["pr ref"] = 0
    d_control["lig query"] = 0
    d_control["subref"] = {}
    d_control["subref empty"] = {}
    d_control["out sheap"] = {}
    filout_control = open (p_dir_result + "quality_extraction.txt", "w")

    # stock smile code
    d_smile = {}

    # sheap control
    d_filout_sheap = {}
    d_filout_sheap ["list"] = [p_dir_result + "shaep_global.txt"]
    d_filout_sheap["global"] = open (p_dir_result + "shaep_global.txt", "w") 
    d_filout_sheap["global"].write ("name\tbest_similarity\tshape_similarity\tESP_similarity\n")

    for ref_folder in l_folder_ref :
        # control folder reference name
        if len (ref_folder) != 4 : 
            p_log.write ("[ERROR folder] -> " + ref_folder + "\n")
            continue

        # reference
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/", name_lig)
        try:
            lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
#             print len (lig_ref_parsed)
        except:
            p_log.write ("[ERROR ligand ref] -> " + p_lig_ref + "\n")
            continue

        #control
        d_control["pr ref"] = d_control["pr ref"] + 1

        # output by reference
        p_dir_result_ref = pathManage.result(name_lig + "/" + ref_folder)
        d_filout_superimposed = {}
        d_filout_superimposed["global"] = open (p_dir_result_ref + "all_ligand_aligned.pdb", "w")
        d_filout_superimposed["sheap"] = open (p_dir_result_ref + "all_ligand_aligned_" + str (thresold_shaep)  + ".pdb", "w")
        
        
        
        # write lig ref -> connect matrix corrrect in all reference and all sheap
        writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_ref_parsed, "HETATM", connect_matrix = 1)
        writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_ref_parsed, "HETATM", connect_matrix = 1)
        
        # inspect folder dataset
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile : 
            # no ligand file
            if len (pdbfile.split ("_")) == 1 : 
                continue
            pdbfile = pdbfile[:-4] # remove extention
            
            if len(pdbfile.split ("_")[0]) == 3  and len(pdbfile.split ("_")[1]) == 4 and pdbfile.split ("_")[1] != ref_folder:
                p_lig = p_dir_dataset + ref_folder + "/" + pdbfile  + ".pdb"
                if p_lig_ref != p_lig : 
                    # pass case where ligand replace same ligand -> does not need run
                    if pdbfile.split ("_")[0] == name_lig : 
                        p_log.write ("[REMOVE] -> same ligand substituate")
                        continue
                    
                    # parsed ligand query
                    lig_parsed = parsePDB.loadCoordSectionPDB(p_lig, "HETATM")

                    # find matrix of rotation
                    p_matrix = pathManage.findMatrix(p_lig_ref, p_lig, name_lig)
                    # control file matrix exist
                    if not path.exists(p_matrix) : 
                        p_log.write ("[ERROR] -> Matrix transloc " + p_lig_ref + " " + p_lig + " " + name_lig + "\n")
                        continue
                    
                    # control
                    d_control["lig query"] = d_control["lig query"] + 1
                    
                    # find the path of complex used
                    p_complex = p_dir_dataset + ref_folder + "/" + p_lig.split ("/")[-1][4:]
                    
                    # ligand rotated -> change the referentiel
                    superposeStructure.applyMatrixLigand(lig_parsed, p_matrix)
                    
                    
                    # use substruct
                    l_p_substruct_ref = pathManage.findSubstructRef (pathManage.dataset(name_lig) + ref_folder + "/" , name_lig)
                    for p_substruct_ref in l_p_substruct_ref : 
                        # ribose or phosphate
                        struct_type = p_substruct_ref.split ("_")[-2]
                        substruct_parsed = parsePDB.loadCoordSectionPDB(p_substruct_ref, "HETATM")
                        
                        l_atom_substituate = neighborSearch.searchNeighborAtom(substruct_parsed, lig_parsed, struct_type, p_log, thresold_superimposed_ribose = thresold_superimposed_ribose, thresold_superimposed_pi = thresold_superimposed_pi)    
                        # control find 
                        if len (l_atom_substituate) == 0 :  
                            if not struct_type in d_control["subref empty"].keys () : 
                                d_control["subref empty"][struct_type] = 1
                            else : 
                                d_control["subref empty"][struct_type] = d_control["subref empty"][struct_type] + 1
                            continue
                        
                        else : 
                            if not struct_type in d_control["subref"].keys () : 
                                d_control["subref"][struct_type] = 1
                            else : 
                                d_control["subref"][struct_type] = d_control["subref"][struct_type] + 1
                            
                            # write PDB file, convert smile
                            p_substituate_pdb = p_dir_result_ref + "substituent_" + pdbfile.split ("_")[0] + "_" + pdbfile.split ("_")[1] + "_" + struct_type + ".pdb"
                            writePDBfile.coordinateSection(p_substituate_pdb, l_atom_substituate, recorder="HETATM", header=0, connect_matrix = 1)
    
                            # sheap reference on part of ligand
                            p_sheap = runOtherSoft.runShaep (p_substruct_ref, p_substituate_pdb, p_substituate_pdb[0:-4] + ".hit", clean = 0)
                            val_sheap = parseShaep.parseOutputShaep (p_sheap)
                            if val_sheap == {} : 
                                p_log.write ("[ERROR] -> ShaEP " + p_substituate_pdb + " " + p_substruct_ref + "\n")
                                
                                if not struct_type in d_control["out sheap"].keys () :
                                    d_control["out sheap"][struct_type] = 1
                                else : 
                                    d_control["out sheap"][struct_type] = d_control["out sheap"][struct_type] + 1
                                continue
                            
                            # control thresold sheap
                            if not struct_type in d_filout_sheap.keys () : 
                                d_filout_sheap[struct_type] = {}
                                d_filout_sheap[struct_type] = open (p_dir_result + "shaep_global_" + struct_type + ".txt", "w")
                                d_filout_sheap[struct_type].write ("name\tbest_similarity\tshape_similarity\tESP_similarity\n")
                                d_filout_sheap["list"].append (p_dir_result + "shaep_global_" + struct_type + ".txt") # to improve with python function
                            
                            # write value in ShaEP control
                            d_filout_sheap[struct_type].write (ref_folder + "_" +  str(pdbfile.split ("_")[1]) + "_" + struct_type + "_" + str (pdbfile.split ("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n")
                            d_filout_sheap["global"].write (ref_folder + "_" +  str(pdbfile.split ("_")[1]) + "_" + struct_type + "_" + str (pdbfile.split ("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n")
                            
                            # rename file substituent with shaEP value
                            rename(p_substituate_pdb, p_substituate_pdb[:-4] + "_" + str (val_sheap["best_similarity"]) + ".pdb")
                            # rename and change the file name
                            p_substituate_pdb = p_substituate_pdb[:-4] + "_" + str (val_sheap["best_similarity"]) + ".pdb"
                            
                            # write all substruct in global file
                            writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_parsed, recorder= "HETATM", header = str(p_lig.split ("/")[-1]) + "_" + str (val_sheap["best_similarity"]) ,  connect_matrix = 1)
                            
                            # control sheap thresold    
                            if float(val_sheap["best_similarity"]) >= thresold_shaep  : 
                                
                                # write subligand superimposed selected in global files
                                writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_parsed, recorder= "HETATM", header = str(p_lig.split ("/")[-1]) + "_" + str (val_sheap["best_similarity"]) ,  connect_matrix = 1)
                                
                                ############
                                # write BS #
                                ############
                                # not only protein superimposed -> also ion and water
                                l_atom_complex = parsePDB.loadCoordSectionPDB(p_complex)
                                superposeStructure.applyMatrixProt(l_atom_complex, p_matrix)
                                p_file_cx = p_dir_result_ref +  "CX_" + p_lig.split ("/")[-1]
                                # write CX
                                writePDBfile.coordinateSection(p_file_cx, l_atom_complex, recorder="ATOM", header= p_lig.split ("/")[-1], connect_matrix = 0)
    
                                # search atom in BS
                                l_atom_binding_site = []
                                for atom_complex in l_atom_complex : 
                                    for atom_substruct in lig_parsed : 
                                        if parsePDB.distanceTwoatoms (atom_substruct, atom_complex) <= thresold_BS :
                                            if not atom_complex in l_atom_binding_site : 
                                                l_atom_binding_site.append (deepcopy(atom_complex))
                                
                                # 3. retrieve complet residue
                                l_atom_BS_res = parsePDB.getResidues(l_atom_binding_site, l_atom_complex)
                                                
                                # 4. write binding site
                                p_binding = p_dir_result_ref +  "BS_" + p_lig.split ("/")[-1]
                                writePDBfile.coordinateSection(p_binding, l_atom_BS_res, "ATOM", p_binding, connect_matrix = 0)
                                
                                # smile code substituate analysis                    
                                # Step smile -> not conversion if shaep not validate 
                                smile_find = runOtherSoft.babelConvertPDBtoSMILE(p_substituate_pdb)
                                if not struct_type in d_smile.keys ()  :
                                    d_smile[struct_type] = {}
                                    d_smile[struct_type][smile_find] = {}
                                    d_smile[struct_type][smile_find]["count"] = 1
                                    d_smile[struct_type][smile_find]["PDB"] = [pdbfile.split ("_")[1]]
                                    d_smile[struct_type][smile_find]["ligand"] = [pdbfile.split ("_")[0]]
                                    d_smile[struct_type][smile_find]["ref"] = [ref_folder]
                                else : 
                                    if not smile_find in d_smile[struct_type].keys () : 
                                        d_smile[struct_type][smile_find] = {}
                                        d_smile[struct_type][smile_find]["count"] = 1
                                        d_smile[struct_type][smile_find]["PDB"] = [pdbfile.split ("_")[1]]
                                        d_smile[struct_type][smile_find]["ligand"] = [pdbfile.split ("_")[0]] 
                                        d_smile[struct_type][smile_find]["ref"] = [ref_folder]
                                    else : 
                                        d_smile[struct_type][smile_find]["count"] = d_smile[struct_type][smile_find]["count"] + 1
                                        d_smile[struct_type][smile_find]["PDB"].append (pdbfile.split ("_")[1])
                                        d_smile[struct_type][smile_find]["ligand"].append (pdbfile.split ("_")[0])
                                        d_smile[struct_type][smile_find]["ref"].append (ref_folder)

                            else : 
                                if not struct_type in d_control["out sheap"].keys () : 
                                    d_control["out sheap"][struct_type] = 1
                                else : 
                                    d_control["out sheap"][struct_type] = d_control["out sheap"][struct_type] + 1

        tool.closeDicoFile (d_filout_superimposed)

    # sheap control    
    tool.closeDicoFile (d_filout_sheap)
    for p_file_sheap in d_filout_sheap["list"] : 
        runOtherSoft.RhistogramMultiple (p_file_sheap)    
        
            
    # write list of smile
    for substruct in d_smile.keys () : 
        p_list_smile = pathManage.result(name_lig) + "list_" + substruct + "_" + str (thresold_shaep) + "_smile.txt"
        filout_smile = open (p_list_smile, "w")
        for smile_code in d_smile[substruct].keys () : 
            l_lig = d_smile[substruct][smile_code]["ligand"]
            l_PDB = d_smile[substruct][smile_code]["PDB"]
            l_ref = d_smile[substruct][smile_code]["ref"]
            filout_smile.write (str (smile_code) + "\t" + str (d_smile[substruct][smile_code]["count"]) + "\t" + " ".join (l_PDB) + "\t" + " ".join (l_ref) + "\t" + " ".join(l_lig) + "\n")
        filout_smile.close ()
    p_log.close ()
    
    # control
    filout_control.write ("NB ref: " + str(d_control["pr ref"]) + "\n")
    filout_control.write ("Ligand query: " + str(d_control["lig query"]) + "\n")
    for k in d_control["subref"].keys () :
        filout_control.write ("LSR " + str (k) + ": " + str(d_control["subref"][k]) + "\n")
    for k in d_control["subref empty"].keys () :
        filout_control.write ("NB LSR empty " + str (k) + ": " + str(d_control["subref empty"][k]) + "\n")
    for k in d_control["out sheap"].keys () :
        filout_control.write ("LSR out by sheap " + str (k) + ": " + str(d_control["out sheap"][k]) + "\n")
    
    filout_control.write ("**********************\n\n")
    for k in d_control["subref"].keys () :
        filout_control.write ("LSR keep" + str (k) + ": " + str(d_control["subref"][k] - d_control["out sheap"][k]) + "\n")
    
    filout_control.close ()
    
    return 1
Example #5
0
def enantiomer(l_ligand, name_folder_final, debug = 1) : 
    "to do file output"
    
    pr_final = pathManage.result("final_" + name_folder_final)
    
    pr_enantiomer = pathManage.generatePath(pr_final + "enantiomer/")
    
    l_ref = []

    d_filout = {}
    for ligand in l_ligand : 
        d_filout[ligand] = {}
        d_filout[ligand]["O3OP"]= open (pr_enantiomer + ligand + "_" + "O3OP" , "w")
        d_filout[ligand]["O4O5"]= open (pr_enantiomer + ligand + "_" + "O4O5" , "w")
        d_filout[ligand]["OPOP"]= open (pr_enantiomer + ligand + "_" + "OPOP" , "w")
        
    l_pr_type_ref = listdir(pr_final) 
    for pr_type_ref in l_pr_type_ref : 
        if debug : print "1", pr_type_ref
        # case where pr_substruct is a file not a folder
        try : l_pr_sub = listdir(pr_final + pr_type_ref + "/")
        except : continue

        for pr_sub in l_pr_sub : 
            print "2", pr_sub

            # case cycle -> append in list respertory with new folder
            if pr_sub == "cycle" : 
                l_pr_sub.remove ("cycle")
                l_pr_sub_cycle = listdir (pr_final + pr_type_ref + "/cycle")
                for pr_sub_cycle in l_pr_sub_cycle : 
                    l_pr_sub.append ("cycle/" + pr_sub_cycle)
                break
        
        for pr_sub in l_pr_sub : 
            try : l_pr_ref = listdir (pr_final + pr_type_ref + "/" + pr_sub)
            except : pass
            if debug : print "3", pr_sub
            
            for pr_ref in l_pr_ref : 
                if debug : print "4", pr_ref
                # case no folder
                try : l_file = listdir(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/")
                except : continue
                for name_file in l_file : 
                    if search("LGD_REF_A",name_file) and search(".pdb",name_file): 
                        #print "2222", l_ref
                        if name_file.split("_")[3][:4] in l_ref : 
                            print "!!!!!", "IN"
                            break
                        else : l_ref.append (name_file.split ("_")[3][:4])                       
 
                        ligand = name_file.split ("_")[2]
                        l_atom_ligand = parsePDB.loadCoordSectionPDB(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/" + name_file, "HETATM")
                        d_minO3OP = 100
                        for atom_ligand in l_atom_ligand : 
                            if atom_ligand["name"] == "O4'" :
                                atom_O4 = atom_ligand
                            elif atom_ligand["name"] == "O5'" :
                                atom_O5 = atom_ligand
                            elif  atom_ligand["name"] == "O3'" :
                                atom_O3 = atom_ligand
                            elif  atom_ligand["name"] == "O1A" :
                                atom_O1A = atom_ligand
                            elif  atom_ligand["name"] == "O2A" :
                                atom_O2A = atom_ligand
                            elif  atom_ligand["name"] == "O1B" :
                                atom_O1B = atom_ligand
                            elif  atom_ligand["name"] == "O2B" :
                                atom_O2B = atom_ligand
                            #elif  atom_ligand["name"] == "O3B" :
                            #    atom_O3B = atom_ligand
                    
                        # d O4 - O5        
                        try : d_O4O5 = parsePDB.distanceTwoatoms(atom_O4, atom_O5)
                        except : continue
                        d_filout[ligand]["O4O5"].write (pr_ref + "_" + pr_type_ref  + "\t" + str (d_O4O5) + "\n")

                        # d O3 - OP
                        for atom_ligand in l_atom_ligand : 
                            if ligand == "AMP" : 
                                if atom_ligand["name"] == "O1P" or atom_ligand["name"] == "O2P" or atom_ligand["name"] == "O3P" : 
                                    d_tempO3OP = parsePDB.distanceTwoatoms(atom_O3, atom_ligand)
                                    if d_tempO3OP < d_minO3OP : 
                                        d_minO3OP = d_tempO3OP
                                        atom_tempO3OP = deepcopy(atom_ligand)
                            else : 
                                if atom_ligand["name"] == "O1A" or atom_ligand["name"] == "O2A" or atom_ligand["name"] == "O3A" : 
                                    d_tempO3OP = parsePDB.distanceTwoatoms(atom_O4, atom_ligand)
                                    if d_tempO3OP < d_minO3OP : 
                                        d_minO3OP = d_tempO3OP
                                        atom_tempO3OP = deepcopy(atom_ligand)
                        d_filout[ligand]["O3OP"].write (pr_ref + "_" + pr_type_ref  +"_" + str(atom_tempO3OP["name"]) + "\t" + str (d_minO3OP) + "\n")
    
                        # d OP OP
                        d_OP = {}
                        if ligand == "ATP" or ligand == "ADP" : 
                            d_OP ["O1AO1B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O1B)
                            d_OP ["O1AO2B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O2B)
                            #d_OP ["O1AO3B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O3B)
                            d_OP ["O2AO1B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O1B)
                            d_OP ["O2AO2B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O2B)
                            #d_OP ["O2AO3B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O3B)
                        
                            d_minOPOP = min (d_OP.values())
                            #print d_minOPOP
                            k_min = [name for name, age in d_OP.items() if age == min (d_OP.values())][0]
                            #print k_min
                            d_filout[ligand]["OPOP"].write (pr_ref + "_" + pr_type_ref  + "_" + str(k_min) + "\t" + str (d_minOPOP) + "\n")
                    
                        try :
                            del d_OP 
                            del atom_O1A
                            del atom_O1B
                            del atom_O2A
                            del atom_O2B
                        except : 
                            pass
                        try : 
                            del atom_O3
                            del atom_O4
                            del atom_O5
                        except :
                            pass
            
    # close files
    for lig in l_ligand : 
        for type_dist in d_filout[lig].keys () : 
            p_file = d_filout[lig][type_dist].name
            d_filout[lig][type_dist].close ()
            runOtherSoft.Rhistogram(p_file, type_dist, brk = 20)
Example #6
0
def retrieveSubstructSuperimposed(name_lig,
                                  thresold_BS=4.5,
                                  thresold_superimposed_ribose=2.5,
                                  thresold_superimposed_pi=3,
                                  thresold_shaep=0.4):

    # ouput
    p_dir_dataset = pathManage.dataset(name_lig)
    p_dir_result = pathManage.result(name_lig)
    l_folder_ref = listdir(p_dir_dataset)

    # log control
    p_log = open(p_dir_result + "log_superimposed.txt", "w")

    # control extraction
    d_control = {}
    d_control["pr ref"] = 0
    d_control["lig query"] = 0
    d_control["subref"] = {}
    d_control["subref empty"] = {}
    d_control["out sheap"] = {}
    filout_control = open(p_dir_result + "quality_extraction.txt", "w")

    # stock smile code
    d_smile = {}

    # sheap control
    d_filout_sheap = {}
    d_filout_sheap["list"] = [p_dir_result + "shaep_global.txt"]
    d_filout_sheap["global"] = open(p_dir_result + "shaep_global.txt", "w")
    d_filout_sheap["global"].write(
        "name\tbest_similarity\tshape_similarity\tESP_similarity\n")

    for ref_folder in l_folder_ref:
        # control folder reference name
        if len(ref_folder) != 4:
            p_log.write("[ERROR folder] -> " + ref_folder + "\n")
            continue

        # reference
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/",
                                             name_lig)
        try:
            lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
#             print len (lig_ref_parsed)
        except:
            p_log.write("[ERROR ligand ref] -> " + p_lig_ref + "\n")
            continue

        #control
        d_control["pr ref"] = d_control["pr ref"] + 1

        # output by reference
        p_dir_result_ref = pathManage.result(name_lig + "/" + ref_folder)
        d_filout_superimposed = {}
        d_filout_superimposed["global"] = open(
            p_dir_result_ref + "all_ligand_aligned.pdb", "w")
        d_filout_superimposed["sheap"] = open(
            p_dir_result_ref + "all_ligand_aligned_" + str(thresold_shaep) +
            ".pdb", "w")

        # write lig ref -> connect matrix corrrect in all reference and all sheap
        writePDBfile.coordinateSection(d_filout_superimposed["global"],
                                       lig_ref_parsed,
                                       "HETATM",
                                       connect_matrix=1)
        writePDBfile.coordinateSection(d_filout_superimposed["sheap"],
                                       lig_ref_parsed,
                                       "HETATM",
                                       connect_matrix=1)

        # inspect folder dataset
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile:
            # no ligand file
            if len(pdbfile.split("_")) == 1:
                continue
            pdbfile = pdbfile[:-4]  # remove extention

            if len(pdbfile.split("_")[0]) == 3 and len(pdbfile.split(
                    "_")[1]) == 4 and pdbfile.split("_")[1] != ref_folder:
                p_lig = p_dir_dataset + ref_folder + "/" + pdbfile + ".pdb"
                if p_lig_ref != p_lig:
                    # pass case where ligand replace same ligand -> does not need run
                    if pdbfile.split("_")[0] == name_lig:
                        p_log.write("[REMOVE] -> same ligand substituate")
                        continue

                    # parsed ligand query
                    lig_parsed = parsePDB.loadCoordSectionPDB(p_lig, "HETATM")

                    # find matrix of rotation
                    p_matrix = pathManage.findMatrix(p_lig_ref, p_lig,
                                                     name_lig)
                    # control file matrix exist
                    if not path.exists(p_matrix):
                        p_log.write("[ERROR] -> Matrix transloc " + p_lig_ref +
                                    " " + p_lig + " " + name_lig + "\n")
                        continue

                    # control
                    d_control["lig query"] = d_control["lig query"] + 1

                    # find the path of complex used
                    p_complex = p_dir_dataset + ref_folder + "/" + p_lig.split(
                        "/")[-1][4:]

                    # ligand rotated -> change the referentiel
                    superposeStructure.applyMatrixLigand(lig_parsed, p_matrix)

                    # use substruct
                    l_p_substruct_ref = pathManage.findSubstructRef(
                        pathManage.dataset(name_lig) + ref_folder + "/",
                        name_lig)
                    for p_substruct_ref in l_p_substruct_ref:
                        # ribose or phosphate
                        struct_type = p_substruct_ref.split("_")[-2]
                        substruct_parsed = parsePDB.loadCoordSectionPDB(
                            p_substruct_ref, "HETATM")

                        l_atom_substituate = neighborSearch.searchNeighborAtom(
                            substruct_parsed,
                            lig_parsed,
                            struct_type,
                            p_log,
                            thresold_superimposed_ribose=
                            thresold_superimposed_ribose,
                            thresold_superimposed_pi=thresold_superimposed_pi)
                        # control find
                        if len(l_atom_substituate) == 0:
                            if not struct_type in d_control[
                                    "subref empty"].keys():
                                d_control["subref empty"][struct_type] = 1
                            else:
                                d_control["subref empty"][
                                    struct_type] = d_control["subref empty"][
                                        struct_type] + 1
                            continue

                        else:
                            if not struct_type in d_control["subref"].keys():
                                d_control["subref"][struct_type] = 1
                            else:
                                d_control["subref"][struct_type] = d_control[
                                    "subref"][struct_type] + 1

                            # write PDB file, convert smile
                            p_substituate_pdb = p_dir_result_ref + "substituent_" + pdbfile.split(
                                "_")[0] + "_" + pdbfile.split(
                                    "_")[1] + "_" + struct_type + ".pdb"
                            writePDBfile.coordinateSection(p_substituate_pdb,
                                                           l_atom_substituate,
                                                           recorder="HETATM",
                                                           header=0,
                                                           connect_matrix=1)

                            # sheap reference on part of ligand
                            p_sheap = runOtherSoft.runShaep(
                                p_substruct_ref,
                                p_substituate_pdb,
                                p_substituate_pdb[0:-4] + ".hit",
                                clean=0)
                            val_sheap = parseShaep.parseOutputShaep(p_sheap)
                            if val_sheap == {}:
                                p_log.write("[ERROR] -> ShaEP " +
                                            p_substituate_pdb + " " +
                                            p_substruct_ref + "\n")

                                if not struct_type in d_control[
                                        "out sheap"].keys():
                                    d_control["out sheap"][struct_type] = 1
                                else:
                                    d_control["out sheap"][
                                        struct_type] = d_control["out sheap"][
                                            struct_type] + 1
                                continue

                            # control thresold sheap
                            if not struct_type in d_filout_sheap.keys():
                                d_filout_sheap[struct_type] = {}
                                d_filout_sheap[struct_type] = open(
                                    p_dir_result + "shaep_global_" +
                                    struct_type + ".txt", "w")
                                d_filout_sheap[struct_type].write(
                                    "name\tbest_similarity\tshape_similarity\tESP_similarity\n"
                                )
                                d_filout_sheap["list"].append(
                                    p_dir_result + "shaep_global_" +
                                    struct_type +
                                    ".txt")  # to improve with python function

                            # write value in ShaEP control
                            d_filout_sheap[struct_type].write(
                                ref_folder + "_" + str(pdbfile.split("_")[1]) +
                                "_" + struct_type + "_" +
                                str(pdbfile.split("_")[0]) + "\t" +
                                str(val_sheap["best_similarity"]) + "\t" +
                                str(val_sheap["shape_similarity"]) + "\t" +
                                str(val_sheap["ESP_similarity"]) + "\n")
                            d_filout_sheap["global"].write(
                                ref_folder + "_" + str(pdbfile.split("_")[1]) +
                                "_" + struct_type + "_" +
                                str(pdbfile.split("_")[0]) + "\t" +
                                str(val_sheap["best_similarity"]) + "\t" +
                                str(val_sheap["shape_similarity"]) + "\t" +
                                str(val_sheap["ESP_similarity"]) + "\n")

                            # rename file substituent with shaEP value
                            rename(
                                p_substituate_pdb,
                                p_substituate_pdb[:-4] + "_" +
                                str(val_sheap["best_similarity"]) + ".pdb")
                            # rename and change the file name
                            p_substituate_pdb = p_substituate_pdb[:-4] + "_" + str(
                                val_sheap["best_similarity"]) + ".pdb"

                            # write all substruct in global file
                            writePDBfile.coordinateSection(
                                d_filout_superimposed["global"],
                                lig_parsed,
                                recorder="HETATM",
                                header=str(p_lig.split("/")[-1]) + "_" +
                                str(val_sheap["best_similarity"]),
                                connect_matrix=1)

                            # control sheap thresold
                            if float(val_sheap["best_similarity"]
                                     ) >= thresold_shaep:

                                # write subligand superimposed selected in global files
                                writePDBfile.coordinateSection(
                                    d_filout_superimposed["sheap"],
                                    lig_parsed,
                                    recorder="HETATM",
                                    header=str(p_lig.split("/")[-1]) + "_" +
                                    str(val_sheap["best_similarity"]),
                                    connect_matrix=1)

                                ############
                                # write BS #
                                ############
                                # not only protein superimposed -> also ion and water
                                l_atom_complex = parsePDB.loadCoordSectionPDB(
                                    p_complex)
                                superposeStructure.applyMatrixProt(
                                    l_atom_complex, p_matrix)
                                p_file_cx = p_dir_result_ref + "CX_" + p_lig.split(
                                    "/")[-1]
                                # write CX
                                writePDBfile.coordinateSection(
                                    p_file_cx,
                                    l_atom_complex,
                                    recorder="ATOM",
                                    header=p_lig.split("/")[-1],
                                    connect_matrix=0)

                                # search atom in BS
                                l_atom_binding_site = []
                                for atom_complex in l_atom_complex:
                                    for atom_substruct in lig_parsed:
                                        if parsePDB.distanceTwoatoms(
                                                atom_substruct,
                                                atom_complex) <= thresold_BS:
                                            if not atom_complex in l_atom_binding_site:
                                                l_atom_binding_site.append(
                                                    deepcopy(atom_complex))

                                # 3. retrieve complet residue
                                l_atom_BS_res = parsePDB.getResidues(
                                    l_atom_binding_site, l_atom_complex)

                                # 4. write binding site
                                p_binding = p_dir_result_ref + "BS_" + p_lig.split(
                                    "/")[-1]
                                writePDBfile.coordinateSection(
                                    p_binding,
                                    l_atom_BS_res,
                                    "ATOM",
                                    p_binding,
                                    connect_matrix=0)

                                # smile code substituate analysis
                                # Step smile -> not conversion if shaep not validate
                                smile_find = runOtherSoft.babelConvertPDBtoSMILE(
                                    p_substituate_pdb)
                                if not struct_type in d_smile.keys():
                                    d_smile[struct_type] = {}
                                    d_smile[struct_type][smile_find] = {}
                                    d_smile[struct_type][smile_find][
                                        "count"] = 1
                                    d_smile[struct_type][smile_find]["PDB"] = [
                                        pdbfile.split("_")[1]
                                    ]
                                    d_smile[struct_type][smile_find][
                                        "ligand"] = [pdbfile.split("_")[0]]
                                    d_smile[struct_type][smile_find]["ref"] = [
                                        ref_folder
                                    ]
                                else:
                                    if not smile_find in d_smile[
                                            struct_type].keys():
                                        d_smile[struct_type][smile_find] = {}
                                        d_smile[struct_type][smile_find][
                                            "count"] = 1
                                        d_smile[struct_type][smile_find][
                                            "PDB"] = [pdbfile.split("_")[1]]
                                        d_smile[struct_type][smile_find][
                                            "ligand"] = [
                                                pdbfile.split("_")[0]
                                            ]
                                        d_smile[struct_type][smile_find][
                                            "ref"] = [ref_folder]
                                    else:
                                        d_smile[struct_type][smile_find][
                                            "count"] = d_smile[struct_type][
                                                smile_find]["count"] + 1
                                        d_smile[struct_type][smile_find][
                                            "PDB"].append(
                                                pdbfile.split("_")[1])
                                        d_smile[struct_type][smile_find][
                                            "ligand"].append(
                                                pdbfile.split("_")[0])
                                        d_smile[struct_type][smile_find][
                                            "ref"].append(ref_folder)

                            else:
                                if not struct_type in d_control[
                                        "out sheap"].keys():
                                    d_control["out sheap"][struct_type] = 1
                                else:
                                    d_control["out sheap"][
                                        struct_type] = d_control["out sheap"][
                                            struct_type] + 1

        tool.closeDicoFile(d_filout_superimposed)

    # sheap control
    tool.closeDicoFile(d_filout_sheap)
    for p_file_sheap in d_filout_sheap["list"]:
        runOtherSoft.RhistogramMultiple(p_file_sheap)

    # write list of smile
    for substruct in d_smile.keys():
        p_list_smile = pathManage.result(
            name_lig) + "list_" + substruct + "_" + str(
                thresold_shaep) + "_smile.txt"
        filout_smile = open(p_list_smile, "w")
        for smile_code in d_smile[substruct].keys():
            l_lig = d_smile[substruct][smile_code]["ligand"]
            l_PDB = d_smile[substruct][smile_code]["PDB"]
            l_ref = d_smile[substruct][smile_code]["ref"]
            filout_smile.write(
                str(smile_code) + "\t" +
                str(d_smile[substruct][smile_code]["count"]) + "\t" +
                " ".join(l_PDB) + "\t" + " ".join(l_ref) + "\t" +
                " ".join(l_lig) + "\n")
        filout_smile.close()
    p_log.close()

    # control
    filout_control.write("NB ref: " + str(d_control["pr ref"]) + "\n")
    filout_control.write("Ligand query: " + str(d_control["lig query"]) + "\n")
    for k in d_control["subref"].keys():
        filout_control.write("LSR " + str(k) + ": " +
                             str(d_control["subref"][k]) + "\n")
    for k in d_control["subref empty"].keys():
        filout_control.write("NB LSR empty " + str(k) + ": " +
                             str(d_control["subref empty"][k]) + "\n")
    for k in d_control["out sheap"].keys():
        filout_control.write("LSR out by sheap " + str(k) + ": " +
                             str(d_control["out sheap"][k]) + "\n")

    filout_control.write("**********************\n\n")
    for k in d_control["subref"].keys():
        filout_control.write("LSR keep" + str(k) + ": " +
                             str(d_control["subref"][k] -
                                 d_control["out sheap"][k]) + "\n")

    filout_control.close()

    return 1
Example #7
0
def analyseIons (pr_dataset, name_ligand, p_filout, thresold_max_interaction = 4.0) : 

    l_folder_ref = listdir(pr_dataset)

    filout = open (p_filout, "w")
    if name_ligand == "ATP" : 
        filout.write ("PDB\tIon\tD1\tD2\tD3\tAngle1\tAngle2\tAt1\tAt2\tA3\n")
    else : 
        filout.write ("PDB\tIon\tD1\tD2\tAngle\tAt1\tAt2\n")
    
    # dictionnary of counting
    d_count = {}
    d_count["CX"] = 0
    d_count["CX + ions"] = 0
    d_count["BS + ions"] = 0
    d_count["BS + 1-ion"] = 0
    d_count["BS + 2-ions"] = 0
    d_count["BS + more-ions"] = 0
    d_count["Interact-1"] = 0
    d_count["Interact-2"] = 0

    
    # dictionnary by ions
    d_ions = {}
    
    for ref_folder in l_folder_ref  :
        only_one = 0
        if len (ref_folder) != 4 : 
            continue
        d_count["CX"] = d_count["CX"] + 1
        l_temp = []
        # path and complex
        p_lig_ref = pathManage.findligandRef(pr_dataset + ref_folder + "/", name_ligand)
        p_complex = pathManage.findPDBRef(pr_dataset + ref_folder + "/")
    
        # parsing
        lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
        l_het_parsed = parsePDB.loadCoordSectionPDB(p_complex, "HETATM")
    
        # retrieve phosphate
        l_pi = retrieveTwoAtomForAngle (lig_ref_parsed, name_ligand)
        if l_pi == [] : # case ligand without phosphate 
            continue 
        flag_interact = 0
        flag_between_1 = 0
        flag_between_2 = 0
        for het_parsed in l_het_parsed : 
            if het_parsed["resName"] in l_ions : 
                d_count["CX + ions"] = d_count["CX + ions"] + 1
                if not het_parsed ["resName"] in d_ions.keys () : 
                    d_ions[het_parsed["resName"]] = 0
                if not het_parsed["resName"] in l_temp :  
                    d_ions[het_parsed["resName"]] = d_ions[het_parsed["resName"]] + 1
                    l_temp.append (het_parsed["resName"])
                PDB_id = ref_folder
                d1 = parsePDB.distanceTwoatoms(l_pi[0], het_parsed)
                d2 = parsePDB.distanceTwoatoms(l_pi[1], het_parsed)
                if name_ligand == "ATP" : 
                    # print len(l_pi), ref_folder, p_lig_ref
                    d3 = parsePDB.distanceTwoatoms(l_pi[2], het_parsed)
                    angle_bis = parsePDB.angleVector(l_pi[1], het_parsed, l_pi[2])
                angle = parsePDB.angleVector(l_pi[0], het_parsed, l_pi[1])
            
                if d1 < 10 and d2 < 10 : 
                    if not het_parsed["resName"] in d_count.keys () : 
                        d_count[het_parsed["resName"]] = 0
                    if only_one == 0 : 
                        d_count[het_parsed["resName"]] = d_count[het_parsed["resName"]] + 1
                        only_one = 1
                    d_count["BS + ions"] = d_count["BS + ions"] + 1
                    flag_interact = flag_interact + 1
                    if d1 < thresold_max_interaction and d2 < thresold_max_interaction : 
                        flag_between_1 = flag_between_1 + 1


                    if name_ligand == "ATP" :
                        if d3 < thresold_max_interaction and d2 < thresold_max_interaction : 
                            flag_between_2 = flag_between_2 + 1
                        filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str (d3) + "\t" + str(angle) + "\t" + str(angle_bis) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\t" + str(l_pi[2]["serial"]) + "\n")
                    else : 
                        filout.write (str (PDB_id) + "\t" + str(het_parsed["resName"]) + "\t" + str(d1) + "\t" + str(d2) + "\t" + str(angle) + "\t" + str(l_pi[0]["serial"]) + "\t" + str(l_pi[1]["serial"]) + "\n")
    
        if flag_interact == 1 : 
            d_count["BS + 1-ion"] = d_count["BS + 1-ion"] + 1
        elif flag_interact == 2 : 
            d_count["BS + 2-ions"] = d_count["BS + 2-ions"] + 1
        elif flag_interact > 2 : 
            d_count["BS + more-ions"] = d_count["BS + more-ions"] + 1

        if flag_between_1 >= 1 : 
            d_count["Interact-1"] = d_count["Interact-1"] + flag_between_1
        if flag_between_2 >= 1 : 
            d_count["Interact-2"] = d_count["Interact-2"] + flag_between_2 

    filout.close ()
    
    filout_count = open (p_filout[0:-4] + "count.txt", "w")
    filout_count.write ("CX: " + str (d_count["CX"]) + "\n")
    filout_count.write ("CX + ions: " + str (d_count["CX + ions"]) + "\n")
    filout_count.write ("BS + ions: " + str(d_count["BS + ions"]) + "\n")
    filout_count.write ("BS + 1-ion: " + str(d_count["BS + 1-ion"]) + "\n")
    filout_count.write ("BS + 2-ions: " + str(d_count["BS + 2-ions"]) + "\n")
    filout_count.write ("BS + more-ions: " + str(d_count["BS + more-ions"]) + "\n")
    filout_count.write ("Interact Pi-alpha + Pi-beta: " + str(d_count["Interact-1"]) + "\n")
    filout_count.write ("Interact Pi-beta + Pi-gama: " + str(d_count["Interact-2"]) + "\n")
    filout_count.close ()

    filout_by_ion = open(p_filout[0:-4] + "byIons_" + name_ligand, "w")
    l_k = d_ions.keys ()
    for k in l_k : 
        filout_by_ion.write (str (k.capitalize()) + "\t" + str (d_ions[k]) + "\n")
    filout_by_ion.close ()
   
    runOtherSoft.barplot (p_filout[0:-4] + "byIons_" + name_ligand)
Example #8
0
def computeRMSDBS (p_ref, p_query, p_substruct, pr_result, thresold_BS = 6) :
    
    
    l_atom_query_parsed = parsePDB.loadCoordSectionPDB(p_query, "ATOM")
    l_atom_ref_parsed = parsePDB.loadCoordSectionPDB(p_ref, "ATOM")
    
    l_atom_substruct = parsePDB.loadCoordSectionPDB(p_substruct)
    
    
    
    l_BS_ref = []
    
    for atom_substruct in l_atom_substruct : 
        for atom_ref in l_atom_ref_parsed : 
            d_atom = parsePDB.distanceTwoatoms(atom_substruct, atom_ref)
            if d_atom <= thresold_BS : 
                l_BS_ref.append (atom_ref)
    # retrieve residue full
    l_BS_ref = parsePDB.getResidues(l_BS_ref, l_atom_ref_parsed)
    
#     print len (l_BS_ref)
#     print len (l_atom_query_parsed)
    
    
    l_BS_query = []
    flag_identic_crystal = 1
    for atomBS_ref in l_BS_ref :
#         print  atomBS_parsed 
        d_max = 100.0 
        for atom_query in l_atom_query_parsed :
            if atom_query["resName"] ==  atomBS_ref["resName"] and atom_query["name"] ==  atomBS_ref["name"] : 
                d = parsePDB.distanceTwoatoms(atom_query, atomBS_ref)
                if d < d_max : 
                    d_max = d
                    res_temp = atom_query
                
        
        #if d_max < thresold_BS : 
        if "res_temp" in locals () :     
            l_BS_query.append (deepcopy(res_temp))
        # identic check number
            if res_temp["resSeq"] != atomBS_ref["resSeq"] : 
                flag_identic_crystal = 0
        #else : 
            # case structure not found
        #    return []
    
    
#     print len (l_BS_query), len (l_BS_ref)
    l_RMSD = RMSDTwoList (l_BS_query, l_BS_ref)
    
    # write PDB
    #p_filout_pdb = pr_result + p_query.split ("/")[-1][0:-4] + "_" + str (flag_identic_crystal) + "_" + p_substruct.split ("_")[-2] + "_" + p_ref.split ("/")[-1]
    #filout_pdb = open (p_filout_pdb, "w")
    #writePDBfile.coordinateSection(filout_pdb, l_BS_ref, recorder = "ATOM")
    #writePDBfile.coordinateSection(filout_pdb, l_BS_query, recorder = "ATOM", header = 0 )
    #filout_pdb.close ()
    
    if l_RMSD == [] : 
        return []
    else : 
        return l_RMSD + [flag_identic_crystal]