예제 #1
0
def controlResult (l_name_ligand):
    
    filout = open(pathManage.result() + "sheap_control.txt", "w")
    
    for name_ligand in l_name_ligand :
        count_sheap = 0
        count_sheap_out = 0 
        count_ribose = 0
        pr_result = pathManage.result(name_ligand)
        
        l_ref = listdir(pr_result)
        for ref_PDB in l_ref : 
            if len(ref_PDB) == 4 : 
                print ref_PDB
                pr_ref = pr_result + ref_PDB
                l_file = listdir(pr_ref)
                for file_ref in l_file : 
                    if search(".hit", file_ref) : 
                        count_sheap = count_sheap + 1
                        
                        if path.getsize(pr_ref +"/" + file_ref ) < 100 : 
                            count_sheap_out = count_sheap_out + 1
                        if search("ribose", file_ref) : 
                            count_ribose = count_ribose + 1
        filout.write (name_ligand + "\n")
        filout.write ("count Shaep:" + str (count_sheap) + "\n")
        filout.write ("count Shaep wrong:" + str (count_sheap_out) + "\n")
        filout.write ("count Shaep ribose:" + str (count_ribose) + "\n")
        filout.write ("******************\n")
예제 #2
0
파일: buildData.py 프로젝트: papoku/LSRs
def builtDatasetGlobal(p_list_ligand,
                       ligand_ID,
                       thresold_RX=2.5,
                       thresold_blast=1e-4,
                       verbose=1):

    # directory with dataset
    p_dir_dataset = pathManage.dataset(ligand_ID)
    # directory with result
    p_dir_result = pathManage.result(ligand_ID + "/datasetBuilding")

    # first extract reference
    d_dataset = extractReference(p_list_ligand, p_dir_dataset, p_dir_result,
                                 ligand_ID)

    # file with name and family
    analysis.familyPDBRef(d_dataset, p_dir_dataset + "family_PDB.txt")

    if verbose: toolViewStructDataset(d_dataset)

    # select reference
    # remove RX and same chain
    p_dir_align = pathManage.result(ligand_ID + "/datasetBuilding/aligmentRef")
    filterReferenceByOne(d_dataset,
                         p_dir_align,
                         ligand_ID,
                         thresold_RX=thresold_RX)

    if verbose: toolViewStructDataset(d_dataset)

    # conserve only unique protein
    filterGlobalDataset(d_dataset, p_dir_align)

    if verbose: toolViewStructDataset(d_dataset)

    # run blast by sequence conserved
    p_dir_blast = pathManage.result(ligand_ID + "/datasetBuilding/blast")
    RunBlast.globalRun(d_dataset, p_dir_blast)

    if verbose: toolViewStructDataset(d_dataset)

    # filter by e-value and RX
    filterBlastResult(d_dataset,
                      p_dir_dataset,
                      ligand_ID,
                      thresold_RX=thresold_RX,
                      thresold_blast=thresold_blast)

    if verbose: toolViewStructDataset(d_dataset)

    # clean folder dataset
    cleanFolderDataset(d_dataset, p_dir_dataset)
예제 #3
0
def ionIdentification(name_ligand):
    """
    step 4 
    search in the close environment if metal is here
    compute distance and angles
    """

    # in folder
    p_dir_dataset = pathManage.dataset(name_ligand)
    p_filout = pathManage.result(name_ligand) + "ionsAnalysis.txt"
    ionSearch.analyseIons(p_dir_dataset, name_ligand, p_filout)
예제 #4
0
def classifRefProtein(pr_dataset,
                      l_lig,
                      thresold_identity=30.0,
                      thresold_similarity=30.0):

    pr_out = pathManage.result("clasifRef")

    # case fasta file
    pr_align_seq = pathManage.generatePath(pr_out + "alignSeq/")
    l_p_fasta = []
    for lig in l_lig:
        pr_dataset = pathManage.dataset(lig)
        l_file_by_lig = listdir(pr_dataset)
        l_pr_ref_by_lig = [pr_dataset + x for x in l_file_by_lig]
        for pr_ref_by_lig in l_pr_ref_by_lig:
            PDB_folder = pr_ref_by_lig.split("/")[-1]

            try:
                l_file = listdir(pr_ref_by_lig)
            except:
                continue
            for file_ref in l_file:
                if search("^" + PDB_folder, file_ref):
                    PDB_ID = file_ref[0:-4]
                    PDB_ID = PDB_ID[0:4].lower() + PDB_ID[4:]
                    # PDB ID with chain associated
                    p_fasta = downloadFile.importFasta(
                        PDB_ID,
                        pr_align_seq,
                        dir_by_PDB=0,
                        debug=1,
                        fastaGlobal="/home/borrel/Yue_project/pdb_seqres.txt")
                    l_p_fasta.append(p_fasta)
                    break

    d_outNeedle = applyNeedleList(l_p_fasta, pr_align_seq)

    # writeMatrix
    writeMatrixFromDico(d_outNeedle, pr_out + "matrixSimilarSeq", "similarity")
    writeMatrixFromDico(d_outNeedle, pr_out + "matrixIDSeq", "identity")

    #Group reference -> l 209
    p_group_id = GroupRef(
        d_outNeedle, "identity",
        pr_out + "groupIdentity" + "_" + str(thresold_identity) + ".txt",
        thresold_identity, l_lig)
    p_group_sim = GroupRef(
        d_outNeedle, "similarity",
        pr_out + "groupSimilarity" + "_" + str(thresold_similarity) + ".txt",
        thresold_similarity, l_lig)

    # merge not alone prot
    MergeGroup(p_group_id)
    MergeGroup(p_group_sim)
예제 #5
0
파일: main.py 프로젝트: ABorrel/LSRs
def ionIdentification (name_ligand):
    """
    step 4 
    search in the close environment if metal is here
    compute distance and angles
    """
    
    
    # in folder
    p_dir_dataset = pathManage.dataset(name_ligand)
    p_filout = pathManage.result(name_ligand) + "ionsAnalysis.txt"
    ionSearch.analyseIons (p_dir_dataset, name_ligand, p_filout)
예제 #6
0
파일: buildData.py 프로젝트: ABorrel/LSRs
def builtDatasetGlobal (p_list_ligand, ligand_ID, thresold_RX = 2.5, thresold_blast = 1e-4, verbose = 1 ):
    
    # directory with dataset
    p_dir_dataset = pathManage.dataset(ligand_ID)
    # directory with result
    p_dir_result = pathManage.result(ligand_ID + "/datasetBuilding")
    
    # first extract reference
    d_dataset = extractReference (p_list_ligand, p_dir_dataset, p_dir_result, ligand_ID)
    
    # file with name and family
    analysis.familyPDBRef (d_dataset, p_dir_dataset + "family_PDB.txt")
    
    if verbose : toolViewStructDataset (d_dataset)
    
    # select reference
    # remove RX and same chain
    p_dir_align = pathManage.result(ligand_ID + "/datasetBuilding/aligmentRef")
    filterReferenceByOne (d_dataset, p_dir_align, ligand_ID, thresold_RX = thresold_RX)
    
    if verbose : toolViewStructDataset (d_dataset)
    
    # conserve only unique protein
    filterGlobalDataset (d_dataset, p_dir_align)
    
    if verbose : toolViewStructDataset (d_dataset)

    # run blast by sequence conserved 
    p_dir_blast = pathManage.result(ligand_ID + "/datasetBuilding/blast")
    RunBlast.globalRun (d_dataset, p_dir_blast)
    
    if verbose : toolViewStructDataset (d_dataset)
    
    # filter by e-value and RX
    filterBlastResult (d_dataset, p_dir_dataset,ligand_ID, thresold_RX = thresold_RX, thresold_blast = thresold_blast)
    
    if verbose : toolViewStructDataset (d_dataset)
    
    # clean folder dataset
    cleanFolderDataset (d_dataset, p_dir_dataset)
예제 #7
0
def resolutionByStructure (name_dataset) :
    
    l_structure = structure.ListSub()
    l_path = []
    
    for strut in l_structure :
        l_path.append (pathManage.result(name_dataset) + "water_" + strut + ".dat")
        filout = open (pathManage.result(name_dataset) + "water_" + strut + ".dat", "w") 
        l_file_summary = pathManage.retrieveSummaryFile (strut, name_dataset)
        
        list_global = []
        for path_summary in l_file_summary : 
            print path_summary
            list_interest_atom = loadFile.loadSummary (path_summary)
            for interest_atom in list_interest_atom : 
                if not interest_atom in list_global : 
                    list_global.append (interest_atom)
                    
        for atom_interest in list_global : 
            rx, i_nb_atom, s_PDB = searchCountH2O(atom_interest)
            filout.write ("%s\t%s\t%s\n"%(s_PDB, rx, i_nb_atom))
        filout.close ()
    return l_path
예제 #8
0
def findFamilyAndGroup (PDB_in, Identity = "30.0") :
    
    p_family_group = pathManage.result ("clasifRef") + "groupIdentity_"  +  str (Identity) + ".txt.filter"
    filin = open (p_family_group, "r")
    l_line_flin = filin.readlines ()
    filin.close ()
    
    for line_filin in l_line_flin [1:]: 
        l_el = line_filin.strip ().split ("\t")
        PDB_ID = l_el [0]
        family = tool.NameFamily (l_el[2])
        group = str (l_el[1])
        if PDB_in == PDB_ID : 
            return group, family
예제 #9
0
파일: main.py 프로젝트: ABorrel/LSRs
def manageResult (l_ligand, name_final, l_out = []):
    
    pr_result = pathManage.result("final_" + name_final)
    # remove the folder 
#     pr_pi = pathManage.result("final/phosphates")
#     pr_ribose = pathManage.result("final/ribose")
    
    
    for name_lig in l_ligand : 
        l_p_smile = pathManage.findListSmileFile(name_lig)
        p_file_famile = pathManage.findFamilyFile (name_lig)
        for p_smile in l_p_smile : 
            if search("ribose", p_smile) and  search(".txt", p_smile) and search("smile", p_smile): 
                arrangeResult.globalArrangement(pr_result, p_smile, p_file_famile, name_lig, l_out) 
            elif search("smile", p_smile) and search(".txt", p_smile) : 
                arrangeResult.globalArrangement(pr_result, p_smile, p_file_famile, name_lig, l_out) 
        
    return 1
예제 #10
0
def manageResult(l_ligand, name_final, l_out=[]):

    pr_result = pathManage.result("final_" + name_final)
    # remove the folder
    #     pr_pi = pathManage.result("final/phosphates")
    #     pr_ribose = pathManage.result("final/ribose")

    for name_lig in l_ligand:
        l_p_smile = pathManage.findListSmileFile(name_lig)
        p_file_famile = pathManage.findFamilyFile(name_lig)
        for p_smile in l_p_smile:
            if search("ribose", p_smile) and search(
                    ".txt", p_smile) and search("smile", p_smile):
                arrangeResult.globalArrangement(pr_result, p_smile,
                                                p_file_famile, name_lig, l_out)
            elif search("smile", p_smile) and search(".txt", p_smile):
                arrangeResult.globalArrangement(pr_result, p_smile,
                                                p_file_famile, name_lig, l_out)

    return 1
예제 #11
0
def analyseLGDProximity(prclassif):

    print(prclassif)
    nameREF = prclassif.split("/")[-1]
    print(nameREF)

    prout = pathManage.result(nameREF + "_LGDsimilarity")
    print(prout)

    # extract IC550 for PDB and ligand
    pbindingDBfiltered = prout + "bindingDBfiltered.txt"
    lkeep = [ "PDB ID(s) for Ligand-Target Complex", "Ligand HET ID in PDB", "Kd (nM)", "Ki (nM)", "IC50 (nM)"]
    parseTSV.TSVFiltered(PBINDINGDB, lkeep, pfilout=pbindingDBfiltered)

    # extract for each reference LGD
    extractLGDfile(prclassif, prout)
    buildMatrixSimilarity(prout, pfileaffinity=pbindingDBfiltered, MCS=1, Sheap=0)

    # extract MMP
    extractMMP(prout)
예제 #12
0
def analyseLGDProximity(prclassif):

    print(prclassif)
    nameREF = prclassif.split("/")[-1]
    print(nameREF)

    prout = pathManage.result(nameREF + "_LGDsimilarity")
    print(prout)

    # extract IC550 for PDB and ligand
    pbindingDBfiltered = prout + "bindingDBfiltered.txt"
    lkeep = [ "PDB ID(s) for Ligand-Target Complex", "Ligand HET ID in PDB", "Kd (nM)", "Ki (nM)", "IC50 (nM)"]
    parseTSV.TSVFiltered(PBINDINGDB, lkeep, pfilout=pbindingDBfiltered)

    # extract for each reference LGD
    extractLGDfile(prclassif, prout)
    buildMatrixSimilarity(prout, pfileaffinity=pbindingDBfiltered, MCS=1, Sheap=0)

    # extract MMP
    extractMMP(prout)
예제 #13
0
def globalShaepStat (substruct):
    
    pr_result = pathManage.result(substruct)
    
    p_filout = pr_result + "shaep_global.txt"
    filout = open (p_filout, "w")
    filout.write ("best_similarity\tshape_similarity\tESP_similarity\n")
    
    l_folder = listdir(pr_result)
    
    
    for ref_folder in l_folder  :
        if not path.isdir(pr_result + ref_folder + "/") : continue
        l_file_result = listdir(pr_result + ref_folder + "/")
        for file_result in l_file_result : 
            if search(".hit", file_result) :
                d_shaep_parsed = parseShaep.parseOutputShaep(pr_result + ref_folder + "/" + file_result) 
                if d_shaep_parsed != {} : 
                    filout.write (ref_folder + "_" + file_result[10:-4] + "\t" + str(d_shaep_parsed["best_similarity"]) + "\t" + str(d_shaep_parsed["shape_similarity"]) + "\t" + str(d_shaep_parsed["ESP_similarity"]) + "\n")
    filout.close ()
    runOtherSoft.RhistogramMultiple (p_filout, "Shaep_score")
예제 #14
0
def classifRefProtein (pr_dataset, l_lig, thresold_identity = 30.0, thresold_similarity = 30.0):
    
    pr_out = pathManage.result("clasifRef")
    
    # case fasta file
    pr_align_seq = pathManage.generatePath(pr_out + "alignSeq/")
    l_p_fasta = []
    for lig in l_lig : 
        pr_dataset = pathManage.dataset(lig)
        l_file_by_lig = listdir(pr_dataset)
        l_pr_ref_by_lig =[pr_dataset + x for x in l_file_by_lig]
        for pr_ref_by_lig in l_pr_ref_by_lig : 
            PDB_folder = pr_ref_by_lig.split ("/")[-1]
            
            try : l_file = listdir(pr_ref_by_lig)
            except : continue
            for file_ref in l_file : 
                if search("^" + PDB_folder, file_ref) :
                    PDB_ID = file_ref[0:-4]
                    PDB_ID = PDB_ID[0:4].lower () + PDB_ID[4:]
                    # PDB ID with chain associated
                    p_fasta = downloadFile.importFasta(PDB_ID, pr_align_seq, dir_by_PDB = 0, debug = 1, fastaGlobal = "/home/borrel/Yue_project/pdb_seqres.txt")
                    l_p_fasta.append (p_fasta)
                    break
            
                
    d_outNeedle = applyNeedleList (l_p_fasta, pr_align_seq)
    
    # writeMatrix
    writeMatrixFromDico (d_outNeedle, pr_out + "matrixSimilarSeq", "similarity" )
    writeMatrixFromDico (d_outNeedle, pr_out + "matrixIDSeq", "identity" )
    
    #Group reference -> l 209
    p_group_id = GroupRef (d_outNeedle, "identity", pr_out + "groupIdentity" +"_" + str (thresold_identity) + ".txt", thresold_identity, l_lig)
    p_group_sim = GroupRef (d_outNeedle, "similarity", pr_out + "groupSimilarity" +"_" + str (thresold_similarity) + ".txt", thresold_similarity, l_lig)
    
    # merge not alone prot
    MergeGroup (p_group_id)
    MergeGroup (p_group_sim)
예제 #15
0
def waterGlobal (name_database, limit_acc = 00.0):
    """
    Number of water molecules in PDB
    arg: -> Path folder database
         -> name folder result
         -> limit acc
    return: NONE
    """
    
    pr_result = pathManage.result (name_database + "/water")
    
    # retrieve list PDB file
    l_PDBID = managePDB.retriveListPDB(name_database)
    # calcul acc with NACESS
    if limit_acc != 0.0 : 
        for PDB_ID in l_PDBID :
            p_PDB = pathManage.pathDitrectoryPDB () + PDB_ID + ".pdb"
            runOtherSoft.runNACESS(p_PDB, pathManage.pathDitrectoryPDB (), multi_run = 0)
        
    p_filout = waterAnalysis.resolutionWater(l_PDBID, pr_result, limit_acc)

    runScriptR.waterPlotResolution (p_filout)
예제 #16
0
파일: bondLength.py 프로젝트: ABorrel/ionic
def GlobalBondLength (name_database, RX_thresold = 1.5):
    
    # directory
    pr_result = pathManage.result(name_database + "/CXbound" + str (RX_thresold))
    pr_database = pathManage.result(name_database)
    
    # filout with distance
    p_CN = pr_result + "distanceCN"
    p_CO = pr_result + "distanceCO"
    p_CC = pr_result + "distanceCC"
    p_coplar = pr_result + "distanceCoplar"
    
    
    filout_CN = open (p_CN, "w")
    filout_CO = open (p_CO, "w")
    filout_CC = open (p_CC, "w")
    filout_coplar = open (p_coplar, "w")
    
    # load PDB with logand
    if not path.exists(pr_database + "resultLigandInPDB") : 
        print "ERROR => file with ligand and PDB does not exist"
        return
    else : 
        d_lig_PDB = loadFile.LigandInPDB(pr_database + "resultLigandInPDB")
    
 
    nb_lig = len(d_lig_PDB.keys())
    print d_lig_PDB.keys()
    
    i = 0
    while (i < nb_lig):
        name_lig = d_lig_PDB.keys()[i]
        
        l_PDB = d_lig_PDB[name_lig]
        
        for PDB in l_PDB : 
            # controle RX
            RX = parsing.Quality(PDB)[0]
#             print RX
            
            if RX <= RX_thresold : 
                l_atom_lig = loadFile.ligandInPDBConnectMatrixLigand(PDB, name_lig)
                
                l_distCN = BondLengthCandX (l_atom_lig, "N")
                l_distCO = BondLengthCandX (l_atom_lig, "O")
                l_distCC = BondLengthCandX (l_atom_lig, "C")
                l_coplarIII = CoplanarityIII(l_atom_lig)
                
                if l_distCN != [] : 
                    filout_CN.write ("\n".join (l_distCN) + "\n")
                
                if l_distCO != [] : 
                    filout_CO.write ("\n".join (l_distCO) + "\n")    
                
                if l_distCC != [] : 
                    filout_CC.write ("\n".join (l_distCC) + "\n")                
                
                if l_coplarIII != [] : 
                    filout_coplar.write ("\n".join (l_coplarIII) + "\n")  
                
                # take only one PDB by ligand not more
                i = i + 1
                continue
            i = i + 1
        
    filout_CO.close ()
    filout_CN.close ()
    filout_CC.close ()
    filout_coplar.close ()
    
    runScriptR.histDistance(p_CN, "CN")
    runScriptR.histDistance(p_CO, "CO") 
    runScriptR.histDistance(p_CC, "CC") 
    runScriptR.histDistance(p_coplar, "coplar") 
예제 #17
0
파일: main.py 프로젝트: ABorrel/LSRs
#######################
# CLASSIFICATION LSRs #
#######################

name_folder_final = "withoutLig"
# manageResult (["AMP", "ADP", "POP", "ATP"], name_folder_final, l_ligand_out)
# arrangeResult.qualityExtraction (["AMP", "ADP", "POP", "ATP"], name_folder_final, p_list_ligand = "/home/borrel/Yue_project/resultLigandInPDB", thresold_sheap = thresold_shaep)
#arrangeResult.countingSubstituent(name_folder_final)


###################################################
# AFFINITY AND INTERACTIONS BY PROTEIN REFERENCE  #
###################################################

# folder final
pr_classif = pathManage.result("final_" + name_folder_final) + "Pi_LSR"
ligandSimilarity.analyseLGDProximity(pr_classif)


#########################################
# ANALYSE CLASSIFICATION BASED ON SHEAP #
#########################################

#classifResults.SheapScoreToClass(pr_classif)



######################
# ANALYSE REFERENCE  #
######################
예제 #18
0
파일: main.py 프로젝트: ABorrel/LSRs
def retrieveSubstructSuperimposed (name_lig, thresold_BS = 4.5, thresold_superimposed_ribose = 2.5, thresold_superimposed_pi = 3, thresold_shaep = 0.4):

    # ouput
    p_dir_dataset = pathManage.dataset(name_lig)
    p_dir_result = pathManage.result(name_lig )
    l_folder_ref = listdir(p_dir_dataset)

    # log control
    p_log = open(p_dir_result + "log_superimposed.txt", "w")

    # control extraction
    d_control = {}
    d_control["pr ref"] = 0
    d_control["lig query"] = 0
    d_control["subref"] = {}
    d_control["subref empty"] = {}
    d_control["out sheap"] = {}
    filout_control = open (p_dir_result + "quality_extraction.txt", "w")

    # stock smile code
    d_smile = {}

    # sheap control
    d_filout_sheap = {}
    d_filout_sheap ["list"] = [p_dir_result + "shaep_global.txt"]
    d_filout_sheap["global"] = open (p_dir_result + "shaep_global.txt", "w") 
    d_filout_sheap["global"].write ("name\tbest_similarity\tshape_similarity\tESP_similarity\n")

    for ref_folder in l_folder_ref :
        # control folder reference name
        if len (ref_folder) != 4 : 
            p_log.write ("[ERROR folder] -> " + ref_folder + "\n")
            continue

        # reference
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/", name_lig)
        try:
            lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
#             print len (lig_ref_parsed)
        except:
            p_log.write ("[ERROR ligand ref] -> " + p_lig_ref + "\n")
            continue

        #control
        d_control["pr ref"] = d_control["pr ref"] + 1

        # output by reference
        p_dir_result_ref = pathManage.result(name_lig + "/" + ref_folder)
        d_filout_superimposed = {}
        d_filout_superimposed["global"] = open (p_dir_result_ref + "all_ligand_aligned.pdb", "w")
        d_filout_superimposed["sheap"] = open (p_dir_result_ref + "all_ligand_aligned_" + str (thresold_shaep)  + ".pdb", "w")
        
        
        
        # write lig ref -> connect matrix corrrect in all reference and all sheap
        writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_ref_parsed, "HETATM", connect_matrix = 1)
        writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_ref_parsed, "HETATM", connect_matrix = 1)
        
        # inspect folder dataset
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile : 
            # no ligand file
            if len (pdbfile.split ("_")) == 1 : 
                continue
            pdbfile = pdbfile[:-4] # remove extention
            
            if len(pdbfile.split ("_")[0]) == 3  and len(pdbfile.split ("_")[1]) == 4 and pdbfile.split ("_")[1] != ref_folder:
                p_lig = p_dir_dataset + ref_folder + "/" + pdbfile  + ".pdb"
                if p_lig_ref != p_lig : 
                    # pass case where ligand replace same ligand -> does not need run
                    if pdbfile.split ("_")[0] == name_lig : 
                        p_log.write ("[REMOVE] -> same ligand substituate")
                        continue
                    
                    # parsed ligand query
                    lig_parsed = parsePDB.loadCoordSectionPDB(p_lig, "HETATM")

                    # find matrix of rotation
                    p_matrix = pathManage.findMatrix(p_lig_ref, p_lig, name_lig)
                    # control file matrix exist
                    if not path.exists(p_matrix) : 
                        p_log.write ("[ERROR] -> Matrix transloc " + p_lig_ref + " " + p_lig + " " + name_lig + "\n")
                        continue
                    
                    # control
                    d_control["lig query"] = d_control["lig query"] + 1
                    
                    # find the path of complex used
                    p_complex = p_dir_dataset + ref_folder + "/" + p_lig.split ("/")[-1][4:]
                    
                    # ligand rotated -> change the referentiel
                    superposeStructure.applyMatrixLigand(lig_parsed, p_matrix)
                    
                    
                    # use substruct
                    l_p_substruct_ref = pathManage.findSubstructRef (pathManage.dataset(name_lig) + ref_folder + "/" , name_lig)
                    for p_substruct_ref in l_p_substruct_ref : 
                        # ribose or phosphate
                        struct_type = p_substruct_ref.split ("_")[-2]
                        substruct_parsed = parsePDB.loadCoordSectionPDB(p_substruct_ref, "HETATM")
                        
                        l_atom_substituate = neighborSearch.searchNeighborAtom(substruct_parsed, lig_parsed, struct_type, p_log, thresold_superimposed_ribose = thresold_superimposed_ribose, thresold_superimposed_pi = thresold_superimposed_pi)    
                        # control find 
                        if len (l_atom_substituate) == 0 :  
                            if not struct_type in d_control["subref empty"].keys () : 
                                d_control["subref empty"][struct_type] = 1
                            else : 
                                d_control["subref empty"][struct_type] = d_control["subref empty"][struct_type] + 1
                            continue
                        
                        else : 
                            if not struct_type in d_control["subref"].keys () : 
                                d_control["subref"][struct_type] = 1
                            else : 
                                d_control["subref"][struct_type] = d_control["subref"][struct_type] + 1
                            
                            # write PDB file, convert smile
                            p_substituate_pdb = p_dir_result_ref + "substituent_" + pdbfile.split ("_")[0] + "_" + pdbfile.split ("_")[1] + "_" + struct_type + ".pdb"
                            writePDBfile.coordinateSection(p_substituate_pdb, l_atom_substituate, recorder="HETATM", header=0, connect_matrix = 1)
    
                            # sheap reference on part of ligand
                            p_sheap = runOtherSoft.runShaep (p_substruct_ref, p_substituate_pdb, p_substituate_pdb[0:-4] + ".hit", clean = 0)
                            val_sheap = parseShaep.parseOutputShaep (p_sheap)
                            if val_sheap == {} : 
                                p_log.write ("[ERROR] -> ShaEP " + p_substituate_pdb + " " + p_substruct_ref + "\n")
                                
                                if not struct_type in d_control["out sheap"].keys () :
                                    d_control["out sheap"][struct_type] = 1
                                else : 
                                    d_control["out sheap"][struct_type] = d_control["out sheap"][struct_type] + 1
                                continue
                            
                            # control thresold sheap
                            if not struct_type in d_filout_sheap.keys () : 
                                d_filout_sheap[struct_type] = {}
                                d_filout_sheap[struct_type] = open (p_dir_result + "shaep_global_" + struct_type + ".txt", "w")
                                d_filout_sheap[struct_type].write ("name\tbest_similarity\tshape_similarity\tESP_similarity\n")
                                d_filout_sheap["list"].append (p_dir_result + "shaep_global_" + struct_type + ".txt") # to improve with python function
                            
                            # write value in ShaEP control
                            d_filout_sheap[struct_type].write (ref_folder + "_" +  str(pdbfile.split ("_")[1]) + "_" + struct_type + "_" + str (pdbfile.split ("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n")
                            d_filout_sheap["global"].write (ref_folder + "_" +  str(pdbfile.split ("_")[1]) + "_" + struct_type + "_" + str (pdbfile.split ("_")[0]) + "\t" + str(val_sheap["best_similarity"]) + "\t" + str(val_sheap["shape_similarity"]) + "\t" + str(val_sheap["ESP_similarity"]) + "\n")
                            
                            # rename file substituent with shaEP value
                            rename(p_substituate_pdb, p_substituate_pdb[:-4] + "_" + str (val_sheap["best_similarity"]) + ".pdb")
                            # rename and change the file name
                            p_substituate_pdb = p_substituate_pdb[:-4] + "_" + str (val_sheap["best_similarity"]) + ".pdb"
                            
                            # write all substruct in global file
                            writePDBfile.coordinateSection(d_filout_superimposed["global"], lig_parsed, recorder= "HETATM", header = str(p_lig.split ("/")[-1]) + "_" + str (val_sheap["best_similarity"]) ,  connect_matrix = 1)
                            
                            # control sheap thresold    
                            if float(val_sheap["best_similarity"]) >= thresold_shaep  : 
                                
                                # write subligand superimposed selected in global files
                                writePDBfile.coordinateSection(d_filout_superimposed["sheap"], lig_parsed, recorder= "HETATM", header = str(p_lig.split ("/")[-1]) + "_" + str (val_sheap["best_similarity"]) ,  connect_matrix = 1)
                                
                                ############
                                # write BS #
                                ############
                                # not only protein superimposed -> also ion and water
                                l_atom_complex = parsePDB.loadCoordSectionPDB(p_complex)
                                superposeStructure.applyMatrixProt(l_atom_complex, p_matrix)
                                p_file_cx = p_dir_result_ref +  "CX_" + p_lig.split ("/")[-1]
                                # write CX
                                writePDBfile.coordinateSection(p_file_cx, l_atom_complex, recorder="ATOM", header= p_lig.split ("/")[-1], connect_matrix = 0)
    
                                # search atom in BS
                                l_atom_binding_site = []
                                for atom_complex in l_atom_complex : 
                                    for atom_substruct in lig_parsed : 
                                        if parsePDB.distanceTwoatoms (atom_substruct, atom_complex) <= thresold_BS :
                                            if not atom_complex in l_atom_binding_site : 
                                                l_atom_binding_site.append (deepcopy(atom_complex))
                                
                                # 3. retrieve complet residue
                                l_atom_BS_res = parsePDB.getResidues(l_atom_binding_site, l_atom_complex)
                                                
                                # 4. write binding site
                                p_binding = p_dir_result_ref +  "BS_" + p_lig.split ("/")[-1]
                                writePDBfile.coordinateSection(p_binding, l_atom_BS_res, "ATOM", p_binding, connect_matrix = 0)
                                
                                # smile code substituate analysis                    
                                # Step smile -> not conversion if shaep not validate 
                                smile_find = runOtherSoft.babelConvertPDBtoSMILE(p_substituate_pdb)
                                if not struct_type in d_smile.keys ()  :
                                    d_smile[struct_type] = {}
                                    d_smile[struct_type][smile_find] = {}
                                    d_smile[struct_type][smile_find]["count"] = 1
                                    d_smile[struct_type][smile_find]["PDB"] = [pdbfile.split ("_")[1]]
                                    d_smile[struct_type][smile_find]["ligand"] = [pdbfile.split ("_")[0]]
                                    d_smile[struct_type][smile_find]["ref"] = [ref_folder]
                                else : 
                                    if not smile_find in d_smile[struct_type].keys () : 
                                        d_smile[struct_type][smile_find] = {}
                                        d_smile[struct_type][smile_find]["count"] = 1
                                        d_smile[struct_type][smile_find]["PDB"] = [pdbfile.split ("_")[1]]
                                        d_smile[struct_type][smile_find]["ligand"] = [pdbfile.split ("_")[0]] 
                                        d_smile[struct_type][smile_find]["ref"] = [ref_folder]
                                    else : 
                                        d_smile[struct_type][smile_find]["count"] = d_smile[struct_type][smile_find]["count"] + 1
                                        d_smile[struct_type][smile_find]["PDB"].append (pdbfile.split ("_")[1])
                                        d_smile[struct_type][smile_find]["ligand"].append (pdbfile.split ("_")[0])
                                        d_smile[struct_type][smile_find]["ref"].append (ref_folder)

                            else : 
                                if not struct_type in d_control["out sheap"].keys () : 
                                    d_control["out sheap"][struct_type] = 1
                                else : 
                                    d_control["out sheap"][struct_type] = d_control["out sheap"][struct_type] + 1

        tool.closeDicoFile (d_filout_superimposed)

    # sheap control    
    tool.closeDicoFile (d_filout_sheap)
    for p_file_sheap in d_filout_sheap["list"] : 
        runOtherSoft.RhistogramMultiple (p_file_sheap)    
        
            
    # write list of smile
    for substruct in d_smile.keys () : 
        p_list_smile = pathManage.result(name_lig) + "list_" + substruct + "_" + str (thresold_shaep) + "_smile.txt"
        filout_smile = open (p_list_smile, "w")
        for smile_code in d_smile[substruct].keys () : 
            l_lig = d_smile[substruct][smile_code]["ligand"]
            l_PDB = d_smile[substruct][smile_code]["PDB"]
            l_ref = d_smile[substruct][smile_code]["ref"]
            filout_smile.write (str (smile_code) + "\t" + str (d_smile[substruct][smile_code]["count"]) + "\t" + " ".join (l_PDB) + "\t" + " ".join (l_ref) + "\t" + " ".join(l_lig) + "\n")
        filout_smile.close ()
    p_log.close ()
    
    # control
    filout_control.write ("NB ref: " + str(d_control["pr ref"]) + "\n")
    filout_control.write ("Ligand query: " + str(d_control["lig query"]) + "\n")
    for k in d_control["subref"].keys () :
        filout_control.write ("LSR " + str (k) + ": " + str(d_control["subref"][k]) + "\n")
    for k in d_control["subref empty"].keys () :
        filout_control.write ("NB LSR empty " + str (k) + ": " + str(d_control["subref empty"][k]) + "\n")
    for k in d_control["out sheap"].keys () :
        filout_control.write ("LSR out by sheap " + str (k) + ": " + str(d_control["out sheap"][k]) + "\n")
    
    filout_control.write ("**********************\n\n")
    for k in d_control["subref"].keys () :
        filout_control.write ("LSR keep" + str (k) + ": " + str(d_control["subref"][k] - d_control["out sheap"][k]) + "\n")
    
    filout_control.close ()
    
    return 1
예제 #19
0
def enantiomer(l_ligand, name_folder_final, debug = 1) : 
    "to do file output"
    
    pr_final = pathManage.result("final_" + name_folder_final)
    
    pr_enantiomer = pathManage.generatePath(pr_final + "enantiomer/")
    
    l_ref = []

    d_filout = {}
    for ligand in l_ligand : 
        d_filout[ligand] = {}
        d_filout[ligand]["O3OP"]= open (pr_enantiomer + ligand + "_" + "O3OP" , "w")
        d_filout[ligand]["O4O5"]= open (pr_enantiomer + ligand + "_" + "O4O5" , "w")
        d_filout[ligand]["OPOP"]= open (pr_enantiomer + ligand + "_" + "OPOP" , "w")
        
    l_pr_type_ref = listdir(pr_final) 
    for pr_type_ref in l_pr_type_ref : 
        if debug : print "1", pr_type_ref
        # case where pr_substruct is a file not a folder
        try : l_pr_sub = listdir(pr_final + pr_type_ref + "/")
        except : continue

        for pr_sub in l_pr_sub : 
            print "2", pr_sub

            # case cycle -> append in list respertory with new folder
            if pr_sub == "cycle" : 
                l_pr_sub.remove ("cycle")
                l_pr_sub_cycle = listdir (pr_final + pr_type_ref + "/cycle")
                for pr_sub_cycle in l_pr_sub_cycle : 
                    l_pr_sub.append ("cycle/" + pr_sub_cycle)
                break
        
        for pr_sub in l_pr_sub : 
            try : l_pr_ref = listdir (pr_final + pr_type_ref + "/" + pr_sub)
            except : pass
            if debug : print "3", pr_sub
            
            for pr_ref in l_pr_ref : 
                if debug : print "4", pr_ref
                # case no folder
                try : l_file = listdir(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/")
                except : continue
                for name_file in l_file : 
                    if search("LGD_REF_A",name_file) and search(".pdb",name_file): 
                        #print "2222", l_ref
                        if name_file.split("_")[3][:4] in l_ref : 
                            print "!!!!!", "IN"
                            break
                        else : l_ref.append (name_file.split ("_")[3][:4])                       
 
                        ligand = name_file.split ("_")[2]
                        l_atom_ligand = parsePDB.loadCoordSectionPDB(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/" + name_file, "HETATM")
                        d_minO3OP = 100
                        for atom_ligand in l_atom_ligand : 
                            if atom_ligand["name"] == "O4'" :
                                atom_O4 = atom_ligand
                            elif atom_ligand["name"] == "O5'" :
                                atom_O5 = atom_ligand
                            elif  atom_ligand["name"] == "O3'" :
                                atom_O3 = atom_ligand
                            elif  atom_ligand["name"] == "O1A" :
                                atom_O1A = atom_ligand
                            elif  atom_ligand["name"] == "O2A" :
                                atom_O2A = atom_ligand
                            elif  atom_ligand["name"] == "O1B" :
                                atom_O1B = atom_ligand
                            elif  atom_ligand["name"] == "O2B" :
                                atom_O2B = atom_ligand
                            #elif  atom_ligand["name"] == "O3B" :
                            #    atom_O3B = atom_ligand
                    
                        # d O4 - O5        
                        try : d_O4O5 = parsePDB.distanceTwoatoms(atom_O4, atom_O5)
                        except : continue
                        d_filout[ligand]["O4O5"].write (pr_ref + "_" + pr_type_ref  + "\t" + str (d_O4O5) + "\n")

                        # d O3 - OP
                        for atom_ligand in l_atom_ligand : 
                            if ligand == "AMP" : 
                                if atom_ligand["name"] == "O1P" or atom_ligand["name"] == "O2P" or atom_ligand["name"] == "O3P" : 
                                    d_tempO3OP = parsePDB.distanceTwoatoms(atom_O3, atom_ligand)
                                    if d_tempO3OP < d_minO3OP : 
                                        d_minO3OP = d_tempO3OP
                                        atom_tempO3OP = deepcopy(atom_ligand)
                            else : 
                                if atom_ligand["name"] == "O1A" or atom_ligand["name"] == "O2A" or atom_ligand["name"] == "O3A" : 
                                    d_tempO3OP = parsePDB.distanceTwoatoms(atom_O4, atom_ligand)
                                    if d_tempO3OP < d_minO3OP : 
                                        d_minO3OP = d_tempO3OP
                                        atom_tempO3OP = deepcopy(atom_ligand)
                        d_filout[ligand]["O3OP"].write (pr_ref + "_" + pr_type_ref  +"_" + str(atom_tempO3OP["name"]) + "\t" + str (d_minO3OP) + "\n")
    
                        # d OP OP
                        d_OP = {}
                        if ligand == "ATP" or ligand == "ADP" : 
                            d_OP ["O1AO1B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O1B)
                            d_OP ["O1AO2B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O2B)
                            #d_OP ["O1AO3B"] = parsePDB.distanceTwoatoms(atom_O1A, atom_O3B)
                            d_OP ["O2AO1B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O1B)
                            d_OP ["O2AO2B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O2B)
                            #d_OP ["O2AO3B"] = parsePDB.distanceTwoatoms(atom_O2A, atom_O3B)
                        
                            d_minOPOP = min (d_OP.values())
                            #print d_minOPOP
                            k_min = [name for name, age in d_OP.items() if age == min (d_OP.values())][0]
                            #print k_min
                            d_filout[ligand]["OPOP"].write (pr_ref + "_" + pr_type_ref  + "_" + str(k_min) + "\t" + str (d_minOPOP) + "\n")
                    
                        try :
                            del d_OP 
                            del atom_O1A
                            del atom_O1B
                            del atom_O2A
                            del atom_O2B
                        except : 
                            pass
                        try : 
                            del atom_O3
                            del atom_O4
                            del atom_O5
                        except :
                            pass
            
    # close files
    for lig in l_ligand : 
        for type_dist in d_filout[lig].keys () : 
            p_file = d_filout[lig][type_dist].name
            d_filout[lig][type_dist].close ()
            runOtherSoft.Rhistogram(p_file, type_dist, brk = 20)
예제 #20
0
def superpositionAllRef (l_ligand, name_folder_final, debug = 1):   
    
    pr_final = pathManage.result("final_" + name_folder_final)
    pr_align = pathManage.generatePath(pr_final + "refAlignement/")
    
    l_ref = []
    d_filout_pdb = {}
    d_filout_RMSE = {}
    d_ref = {}
    l_file_RMSE = []
    for ligand in l_ligand : 
        d_filout_pdb[ligand] = open (pr_align + ligand + "_" + "superimposed.pdb" , "w")
        d_filout_RMSE[ligand] = open (pr_align + ligand + "_" + "RMSE.txt" , "w")
        l_file_RMSE.append (pr_align + ligand + "_" + "RMSE.txt") 
    
    l_pr_type_ref = listdir(pr_final) 
    for pr_type_ref in l_pr_type_ref : 
        if debug : print "1", pr_type_ref
        # case where pr_substruct is a file not a folder
        try : l_pr_sub = listdir(pr_final + pr_type_ref + "/")
        except : continue

        for pr_sub in l_pr_sub : 
            print "2", pr_sub

            # case cycle -> append in list respertory with new folder
            if pr_sub == "cycle" : 
                l_pr_sub.remove ("cycle")
                l_pr_sub_cycle = listdir (pr_final + pr_type_ref + "/cycle")
                for pr_sub_cycle in l_pr_sub_cycle : 
                    l_pr_sub.append ("cycle/" + pr_sub_cycle)
                break
        
        for pr_sub in l_pr_sub : 
            try : l_pr_ref = listdir (pr_final + pr_type_ref + "/" + pr_sub)
            except : pass
            if debug : print "3", pr_sub
            
            for pr_ref in l_pr_ref : 
                if debug : print "4", pr_ref
                # case no folder
                try : l_file = listdir(pr_final + pr_type_ref + "/" + pr_sub + "/" + pr_ref + "/LGD/")
                except : continue
                for name_file in l_file : 
                    if search("LGD_REF_A",name_file) and search(".pdb",name_file): 
                        #print "2222", l_ref
                        if name_file.split("_")[3][:4] in l_ref : 
                            print "!!!!!", "IN"
                            break
                        else : l_ref.append (name_file.split ("_")[3][:4])                       


                        ligand = name_file.split ("_")[2]
                        l_atom_ligand = parsePDB.loadCoordSectionPDB(pr_final + pr_type_ref + "/" + pr_sub  + "/" + pr_ref + "/LGD/" + name_file, "HETATM", remove_H=1)
                        l_atom_adenine = substructTools.retrieveAdenine(l_atom_ligand)
                        if not ligand in d_ref.keys () : 
                            # stock in tempory dictionary for the reference
                            d_ref[ligand] = []
                            d_ref[ligand].append (l_atom_ligand)
                            d_ref[ligand].append (l_atom_adenine)
                            writePDBfile.coordinateSection(d_filout_pdb[ligand], l_atom_ligand, "HETATM", connect_matrix = 1)
                            continue
                        else : 
                            rotation, translocation =  superimpose.rigid_transform_3D(l_atom_adenine, d_ref[ligand][-1])
                            if rotation == None or translocation == None : 
                                continue
                            # rotation + translation
                            l_atom_lig_rotated = superimpose.applyTranformation(rotation, translocation, l_atom_in=l_atom_ligand)
                            # write PDB file and RMSE
#                             print "============"
#                             print ligand, pr_ref
#                             print len (l_atom_lig_rotated)
#                             print len (d_ref[ligand][0])
#                             print "============"
                            if len (l_atom_lig_rotated) != len (d_ref[ligand][0]) : 
                                continue
                        
                            writePDBfile.coordinateSection(d_filout_pdb[ligand], l_atom_lig_rotated, "HETATM", connect_matrix = 1)
                            RMSE_ligand = superimpose.rmse(d_ref[ligand][0], l_atom_lig_rotated)
                            d_filout_RMSE[ligand].write (str (pr_ref) + pr_type_ref  + "\t" + str(RMSE_ligand) + "\n")
            
    # close files
    for lig in d_filout_pdb.keys () : 
        d_filout_pdb[lig].close ()
        d_filout_RMSE[lig].close ()

    for file_RMSE in l_file_RMSE : 
        runOtherSoft.Rhistogram(file_RMSE, "RMSE_Adenine")                                 
예제 #21
0
def qualityExtraction (l_ligand, name_folder, p_list_ligand, thresold_sheap) : 
    
    pr_result = pathManage.result("final_" + name_folder)
    
    filout = open(pr_result + "quality_extraction.txt", "w")
    
    # number PDB by ligand, without filter
    filout.write ("Number PDB by ligand:\n")
    
    d_dataset =  tool.parseLigandPDBList(p_list_ligand)
    for ligand in l_ligand : 
        filout.write (str (ligand) + ": " + str (len (d_dataset[ligand])) + "\n")
    
    # number references
    filout.write ("\n*************\n\nNumber references by ligands:\n")
    for ligand in l_ligand : 
        pr_result_ligand = pathManage.result(ligand)
        nb_ref = -2
        l_file = listdir(pr_result_ligand)
        for f in l_file : 
            if path.isdir (pr_result_ligand + "/" + f) : 
                nb_ref = nb_ref + 1
        filout.write (ligand + ": " + str (nb_ref) + "\n")
        
    # number of query by ref in means and max and min (after blast)
    filout.write ("\n*************\n\nNumber means queries by references:\n")
    p_family_all = pathManage.result() + "reference_family_all.txt"
    filout_family_all = open (p_family_all, "w")
    d_family_all = {}
    for ligand in l_ligand : 
        d_nb_query = {}
        d_family = {}
        p_filout_family = pathManage.result() + "reference_family_" + ligand + ".txt"
        p_filout_family_count = pathManage.result () + "count_family_" + ligand + ".txt"
        filout_family = open (p_filout_family, "w")
        filout_family_count = open (p_filout_family_count, "w")
        pr_result_ligand = pathManage.result(ligand)
        nb_ref = 0
        l_file = listdir(pr_result_ligand)
        for f in l_file : 
            if path.isdir (pr_result_ligand + "/" + f) and len (f) == 4: 
                # count by family
                family_ref = analysis.findFamily(f, pathManage.findFamilyFile (ligand))
                filout_family.write ("\t".join (family_ref) + "\n")
                if not family_ref[-1] in d_family.keys () : 
                    d_family[family_ref[-1]] = 0
                d_family[family_ref[-1]] = d_family[family_ref[-1]] + 1
                # file all
                if not family_ref[-1] in d_family_all.keys () : 
                    d_family_all[family_ref[-1]] = 0
                d_family_all[family_ref[-1]] = d_family_all[family_ref[-1]] + 1
                
                # count number of references
                nb_ref = nb_ref + 1
                d_nb_query[f] = 0
                l_file_queries = listdir(pr_result_ligand + "/" + f + "/")
                for file_query in l_file_queries : 
                    if search ("CX",file_query) : 
                        d_nb_query[f] = d_nb_query[f] + 1
        filout.write (ligand + ": " + str(np.sum(d_nb_query.values ())) + "\n")
        filout.write (ligand + ": " + str(np.mean(d_nb_query.values ())) + "+/-" + str(np.std (d_nb_query.values ())) + "\n")
        filout.write ("MAX " + str (ligand) + ": " + str (max (d_nb_query.values ())) + " " + str (d_nb_query.keys ()[d_nb_query.values ().index (max (d_nb_query.values ()))]) +"\n")
    
        # family
        filout_family_count.write ("\t".join(d_family.keys ()) + "\n")
        l_values = [str(x) for x in d_family.values ()]
        filout_family_count.write ("\t".join(l_values) + "\n")
        filout_family.close ()
        filout_family_count.close ()
        runOtherSoft.piePlot(p_filout_family_count)

    # all family
    filout_family_all.write ("\t".join(d_family_all.keys ()) + "\n")
    l_values = [str(x) for x in d_family_all.values ()]
    filout_family_all.write ("\t".join(l_values) + "\n")
    filout_family_all.close ()    
    runOtherSoft.piePlot(p_family_all)
        
    
    # number subref by ligand
    filout.write ("\n*************\n\nNumber of subref considered:\n")
    for ligand in l_ligand :
        d_nb_sub = {}
        d_nb_sub_sheap = {}
        pr_result_ligand = pathManage.result(ligand)
        l_ref = listdir(pr_result_ligand)
        for ref in l_ref : 
            if path.isdir (pr_result_ligand + "/" + ref) and len (ref) == 4: 
                l_file_queries = listdir(pr_result_ligand + "/" + ref + "/")
                for file_query in l_file_queries : 
                    if search ("substituent",file_query) and search (".pdb",file_query): 
                        atom_substituate = file_query.split ("_")[-2]
                        try : value_sheap = float(file_query.split ("_")[-1][:-4])
                        except : continue
                        if not atom_substituate in d_nb_sub.keys () : 
                            d_nb_sub[atom_substituate] = 0
                        d_nb_sub[atom_substituate] = d_nb_sub[atom_substituate] + 1
                        
                        if value_sheap > thresold_sheap : 
                            if not atom_substituate in d_nb_sub_sheap : 
                                d_nb_sub_sheap[atom_substituate] = 0
                            d_nb_sub_sheap[atom_substituate] = d_nb_sub_sheap[atom_substituate] + 1
        filout.write ("\n" + ligand + "\n")
        for atom_substituate in d_nb_sub.keys () : 
            filout.write (atom_substituate + ": " + str (d_nb_sub[atom_substituate]) + "\n")
            try : filout.write (atom_substituate + " ShaEP: " + str (d_nb_sub_sheap[atom_substituate]) + "\n")
            except : filout.write (atom_substituate + " ShaEP: 0\n")
    filout.close()
예제 #22
0
def countingSubstituent (name_final, debug = 1):
    
    pr_final_folder = pathManage.result("final_" + name_final)
    
    d_count = {}
    d_lig = {}
    d_by_ref = {}
    d_count_pr = {}
    l_file_final = listdir(pr_final_folder)
    if debug : print "1", pr_final_folder
    for pr_type_subref in l_file_final :
        # case where pr type is a file not a folder
        try : l_pr_sub = listdir(pr_final_folder + pr_type_subref + "/")
        except : continue
        if debug: print "2",pr_final_folder +  pr_type_subref + "/"
        
        # case cycle append one directory
        if "cycle" in l_pr_sub : 
            l_pr_sub.remove ("cycle")
            l_second_sub = listdir (pr_final_folder + pr_type_subref + "/cycle/")
        
            for second_sub in l_second_sub : 
                l_pr_sub.append ("cycle/" + second_sub)


        for pr_sub in l_pr_sub : 
            # case where pr_type_substituent is a folder
            try : l_pr_PDBref = listdir(pr_final_folder + pr_type_subref + "/" + pr_sub + "/")
            except : continue
            if debug : print "3", pr_final_folder + pr_type_subref, pr_sub             

            for pr_PDBref in l_pr_PDBref :
                PDB_ref = pr_PDBref.split ("_")[-1]
                family_ref = pr_PDBref.split ("-")[0]
                group_ref = pr_PDBref.split ("_")[0].split ("-")[-1]
                pr_LGD = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/LGD/"
                pr_LSR = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/LSR/"
                pr_BS = pr_final_folder + pr_type_subref + "/" + pr_sub + "/" + pr_PDBref + "/BS/"
                if debug : 
                    print "4",pr_LGD
                    print "4", pr_BS
                    print "4", pr_LSR




                ################
                #  folder LSR  #
                ################
                l_file_LSR = listdir (pr_LSR)

                for file_LSR in l_file_LSR :
                    # -> count by type sub reference
                    if search ("LSR_", file_LSR) and file_LSR.split ("_")[1] != "REF" :
                        ligand_sub = file_LSR.split ("_")[1]
                        if debug : print "5", file_LSR
                        if not ligand_sub in d_count.keys () : 
                            d_count[ligand_sub] = {}
                    
                        if not pr_sub in d_count[ligand_sub].keys () : 
                            d_count[ligand_sub][pr_sub] = 0
                        d_count[ligand_sub][pr_sub] = d_count[ligand_sub][pr_sub] + 1
                    
                    ################
                    # complet LSR  #
                    ################
                    elif search ("LSR", file_LSR):
                        # case LSR reference #
                        ######################
                        if search ("REF_", file_LSR) :
                            lig_ref = file_LSR.split ("_")[2][:3]
                            if not lig_ref in d_by_ref.keys () : 
                                d_by_ref[lig_ref] = {}

                            type_ref = pr_type_subref.split ("_")[0]

                            if not type_ref in d_by_ref[lig_ref].keys () : 
                                    d_by_ref[lig_ref][type_ref] = 0
                            
                            d_by_ref[lig_ref][type_ref] = d_by_ref[lig_ref][type_ref] + 1
            
            
                #################    
                #  folder LGD   #
                #################
                l_file_LGD = listdir(pr_LGD)
                for file_LGD in l_file_LGD : 
                    # print file_ref
                    if search ("LGD", file_LGD):
                        ligand = file_LGD.split ("_")[1]
                        if ligand == "REF" : 
                            continue
                        if not ligand in d_lig.keys () : 
                            d_lig[ligand] = {}
                            d_lig[ligand]["count"] = 0
                            d_lig[ligand]["group"] = []
                            d_lig[ligand]["family"] = []
                        d_lig[ligand]["count"] = d_lig[ligand]["count"] + 1
                        d_lig[ligand]["family"].append (str(family_ref))
                        d_lig[ligand]["group"].append (str(group_ref))

            
                ###############
                #  folder BS  #
                ###############
                l_file_BS = listdir(pr_BS)
                for file_BS in l_file_BS : 
                    if search ("BS_REF", file_BS):
                        lig_ref = file_BS.split ("_")[2]
                        pr_ref = file_BS.split ("_")[3].split (".")[0]
                        print lig_ref, pr_ref, "*****"
                        if not lig_ref in d_count_pr.keys () : 
                            d_count_pr[lig_ref] = {}
                            d_count_pr[lig_ref]["pr ref"] = []
                            d_count_pr[lig_ref]["pr queries"] = []
                            d_count_pr[lig_ref]["lig queries"] = []
                                   
                        if not pr_ref in d_count_pr[lig_ref]["pr ref"] : 
                            d_count_pr[lig_ref]["pr ref"].append (pr_ref)
                                
                                
                        try:
                            family = analysis.findFamily (pr_ref, pathManage.dataset (lig_ref) + "family_PDB.txt")
                            if not family in d_count_pr[lig_ref].keys () : 
                                d_count_pr[lig_ref][family] = 0
                            d_count_pr[lig_ref][family] = d_count_pr[lig_ref][family] + 1
                        except: pass
                

                # BS -> query
                for file_BS in l_file_BS : 
                    # for not reference BS
                    if not search ("BS_REF", file_BS) : 
                        lig_querie = file_BS.split ("_")[1]
                        prot_querie = file_BS.split ("_")[2][0:4]
                        print prot_querie, lig_querie, "*******"
                        # find ligand reference
                        # lig ref define in previous step
                        d_count_pr[lig_ref]["pr queries"].append (prot_querie)
                        d_count_pr[lig_ref]["lig queries"].append (lig_querie)


    # write and plot #
    ##################
    pr_result = pathManage.generatePath(pr_final_folder + "counting/")
    for ligand_sub in d_count.keys () : 
        p_filout = pr_result + ligand_sub
        filout = open (p_filout, "w")
        filout.write ("\t".join(d_count[ligand_sub].keys ()) + "\n")
        l_value = [str(x) for x in d_count[ligand_sub].values ()]
        filout.write ("\t".join(l_value) + "\n")
        filout.close ()
        runOtherSoft.piePlot(p_filout)
    
    filout_lig = open (pr_result + "count_ligand", "w")
    filout_lig.write ("Ligand ID\tNumber of occurences in the dataset\tNumber of different clusters\tList of clusters\tList of protein families\n")
    for lig in d_lig.keys () : 
        if d_lig[lig] > 1 : 
            filout_lig.write (str (lig) + "\t" + str (d_lig[lig]["count"]) + "\t" + str(len (list (set(d_lig[lig]["group"]))))  + "\t" + " ".join (d_lig[lig]["group"]) + "\t" + " ".join (d_lig[lig]["family"]) + "\n")
    filout_lig.close ()
    
    filout_LSR_lig = open (pr_result + "CountByLigandRef", "w")
    for lig_ref in d_by_ref.keys () : 
        filout_LSR_lig.write ("====" + str (lig_ref) + "====\n")
        for sub_ref in d_by_ref[lig_ref].keys () : 
            filout_LSR_lig.write (str (sub_ref) + ": " + str (d_by_ref[lig_ref][sub_ref]) + "\n")
    filout_LSR_lig.close ()

    filout_pr_count = open (pr_result + "count_pr", "w")
    for lig in d_count_pr.keys () : 
        filout_pr_count.write ("====" + str (lig) + "====\n")
        filout_pr_count.write ("nb ref pr: " + str (len (d_count_pr[lig]["pr ref"])) + "\n")
        filout_pr_count.write ("nb querie pr: " + str (len (d_count_pr[lig]["pr queries"])) + "\n")
        filout_pr_count.write ("nb ligand queries: " + str (len (d_count_pr[lig]["lig queries"])) + "\n")

    for family in d_count_pr[lig].keys () : 
        if family != "pr ref" and family != "pr queries" and family != "lig queries" :
            filout_pr_count.write ("Ref " + str (family) + ": " + str (d_count_pr[lig][family]) + "\n")


    filout_pr_count.close ()

    runOtherSoft.barplot(pr_result + "count_ligand")
예제 #23
0
def globalArrangement (pr_orgin, p_smile, p_family, name_ligand, l_ligand_out):
    
#     print "--------"
#     print pr_orgin
#     print p_smile
#     print p_family
#     print name_ligand
#     print "--------"
    
    
    subst = p_smile.split ("_")[-3]
    
    filin = open (p_smile, "r")
    l_line_smile = filin.readlines ()
    filin.close()
    
    for line_smile in l_line_smile : 
        
        # search substructure
#         print line_smile
        l_PDB_query = line_smile.split ("\t")[-3].split (" ")
#         print l_PDB_query
        l_PDB_ref = line_smile.split ("\t")[-2].split (" ")
        l_ligand = line_smile.strip().split ("\t")[-1].split (" ")
        
        # search replacement
        smile = line_smile.split ("\t")[0]
        
        # search if LSR is small -> thresold < 3
        small_LSR = smileAnalysis.smallLSR (smile) 
        if subst == "ribose" :  
            if small_LSR == 1 : 
                first_folder = "ribose_small"
            else : 
                first_folder = "ribose"
        else : 
            if small_LSR == 1 : 
                first_folder = "Pi_small"
            else : 
                first_folder = "Pi"
        
        
        print smile, l_PDB_query, l_PDB_ref, l_ligand, subst, small_LSR
        replacement, metal = smileAnalysis.searchReplacement (smile, l_PDB_query[0], l_PDB_ref[0], name_ligand)
        
        # case with cycle -> search replacement 2
        if replacement == "cycle" : 
            replacement2, metal = smileAnalysis.searchReplacement (smile, l_PDB_query[0], l_PDB_ref[0], name_ligand, in_cycle = 1)
            replacement = replacement + "/" + replacement2 # new folder

        # case metal
        if replacement == "metal" : 
            print metal, l_PDB_query, l_PDB_ref, name_ligand
        
        len_find = len (l_PDB_ref)
        i = 0
        while i < len_find : 
            
            # exclusion of ligand out
            if l_ligand[i] in l_ligand_out : 
                i = i + 1
                continue
            
            
            group, family = analysis.findFamilyAndGroup(l_PDB_ref[i])
            
            # folder reference
            pr_dataset = pathManage.dataset(name_ligand + "/" + l_PDB_ref[i])
            
            PDB_ref = pathManage.findPDBRef(pr_dataset)
            p_ligand_ref = pathManage.findligandRef(pr_dataset , name_ligand)
            l_frag_ref = pathManage.findSubstructRef(pr_dataset, name_ligand)
            for f_ref in l_frag_ref :
                if search (subst, f_ref) : 
                    p_frag_ref = f_ref
                    break
            
            # folder_query
            pr_result = pathManage.result(name_ligand + "/" + l_PDB_ref[i])
            l_protein_tranloc = pathManage.findPDBQueryTransloc(pr_result)
            for p_t in l_protein_tranloc : 
                if search (l_ligand[i], p_t) and search (l_PDB_query[i], p_t) : 
                    p_protein_query = p_t
                    break
                
            if replacement != "metal" : 
                p_lig_query = pathManage.findligandQuery(pr_dataset , l_ligand[i], l_PDB_query[i])
            else : 
                p_lig_query = pathManage.findligandQuery(pr_dataset ,metal, l_PDB_query[i])
            # need apply transloc matrix
            matrix_transloc = pathManage.findMatrix(p_ligand_ref, p_lig_query, name_ligand)
            lig_query_parsed = parsePDB.loadCoordSectionPDB(p_lig_query)
            try : superposeStructure.applyMatrixLigand(lig_query_parsed, matrix_transloc)
            except : 
                i = i + 1
                continue
            
            
            p_lig_substituate = pathManage.findSubstructFind(pr_result, l_ligand[i], l_PDB_query[i], subst)
            l_p_BS = pathManage.findFileBS(pr_result, l_PDB_query[i])
            for BS in l_p_BS : 
                if search (l_ligand[i], BS) : 
                    p_BS = BS
                    break
            
            
#             print pr_final
#             print "***************"
#             print PDB_ref
#             print p_ligand_ref
#             print p_frag_ref
#             print "----"
#             print p_protein_query
#             print p_lig_query
#             print p_lig_substituate
#             print p_BS
#             print "**************"
            # ajouter group + family 2 lettre
            pr_final = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-"  + str (group) + "_" + l_PDB_ref[i] +  "/" 
            pr_ligand = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-" +  str (group) + "_" + l_PDB_ref[i] + "/LGD/"
            pr_BS = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-" + str (group) + "_" + l_PDB_ref[i] + "/BS/"
            pr_sust = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-"  + str (group) + "_" + l_PDB_ref[i] + "/LSR/"
            
            if not path.isdir(pr_final):
                makedirs (pr_final)
            
            if not path.isdir(pr_ligand):
                makedirs (pr_ligand)
            
            if not path.isdir(pr_BS):
                makedirs (pr_BS)
                
            if not path.isdir(pr_sust):
                makedirs (pr_sust)   
            
            # list file
            p_list_smile_queries = pr_sust + "list.smile"
            if not path.exists(p_list_smile_queries) : 
                file_smile_queries = open (p_list_smile_queries, "w")
            else : 
                file_smile_queries = open (p_list_smile_queries, "a")
            file_smile_queries.write (str(smile) + "\n")
            file_smile_queries.close ()
            
            # lig de la query
            writePDBfile.coordinateSection(pr_ligand + "LGD_" + p_lig_query.split ("/")[-1], lig_query_parsed, recorder = "HETATM", header = "LCG_" + p_lig_query.split ("/")[-1], connect_matrix = 1)
            runOtherSoft.babelConvertPDBtoSMILE(pr_ligand + "LGD_" + p_lig_query.split ("/")[-1], clean_smi = 1)
            # lig de reference + smile
            copy2(p_ligand_ref, pr_ligand + "LGD_REF_" + p_ligand_ref.split ("/")[-1])
            runOtherSoft.babelConvertPDBtoSMILE(pr_ligand + "LGD_REF_" + p_ligand_ref.split ("/")[-1])
            # LSR de ref
            copy2(p_frag_ref, pr_sust + "LSR_REF_" + name_ligand + "_" + l_PDB_ref[i] + ".pdb")
            # protein query
            #copy2(p_protein_query, pr_final)
            # LSR query -> p_lig_ref only for the name
            copy2(p_lig_substituate, pr_sust + "LSR_" + subst + "_"  + p_lig_query.split ("/")[-1])
            # BS query
            copy2(p_BS, pr_BS)   
            
            # BS from reference
            l_atom_BS = parsePDB.computeBS (PDB_ref, p_ligand_ref, thresold = 4.50, option_onlyATOM = 0)
            writePDBfile.coordinateSection(pr_BS + "BS_REF_" + name_ligand + "_" + PDB_ref.split ("/")[-1], l_atom_BS, recorder = "ATOM", header = "BS_REF_" + name_ligand + "_" + PDB_ref, connect_matrix = 0)
            
            i = i + 1
    
    return 1
예제 #24
0
def main (name_database, max_distance = 5.0, RX = 3.00, RFree = 0.25, option_superimpose = 0, option_on_complexes_by_ligand = 0, option_bond = 0, option_stat = 0, option_stat_dataset = 0, option_merge = 0, verbose = 1):
    
    
    #format input
    max_distance = float (max_distance)
     
    # run one database
    pr_result = pathManage.result (name_database)
    
    # search ligand in PDB
    searchPDB.ligands(name_database, pr_result)
    
    # dataset with resolution
    l_p_dataset = datasetFinal.Builder(name_database, RX, RFree, option_on_complexes_by_ligand)
    
#     ########################
#     #   Parsing dataset   #
#     ########################
# # 
    if option_stat_dataset == 1 : 
        for p_dataset in l_p_dataset : 
            statistic.ParseDataSet(p_dataset)
        
     
#     ####################
#     # result directory #
#     ####################
#     
#
    # run for every dataset -> with diffrent resolution
    # short cut
#     l_p_dataset = ["/home/borrel/saltBridgesProject/result/PDB/3.0_0.25_uniquePDB/dataset_3.00.txt" ]
# # #     
    for p_dataset in l_p_dataset : 
        
        pr_result = pathManage.CreatePathDir(p_dataset[:-4] + "/")
        pr_hetion = pathManage.CreatePathDir(p_dataset[:-4] + "/HET/")
        
        if verbose == 1 :  
            print "== control path Main =="
            print pr_result
            print pr_hetion
            print "======================="
        
        
#         # stat -> build structure, not filter is !!!
        d_sub_neighbor = searchPDB.globalSearch(max_distance, p_dataset, pr_result)
    
        # remove iron close -> statistic before 
        # Becarful because the dictionnary change
        print "control-1", len(d_sub_neighbor["I"])
        d_close_het = hetCloseAnalysis.removeNeighborIron (d_sub_neighbor, pr_hetion + "ionSummarySubstruct.txt")
        print "control-2", len(d_sub_neighbor["I"])
        
        if option_superimpose == 1 : 
            # superimpose neighbors -> refaire a Helsinki car MAJ de de la PDB
            superimpose.globalNeighbor (d_sub_neighbor, "I", pr_result)
            superimpose.globalNeighbor (d_sub_neighbor, "II", pr_result)
            superimpose.globalNeighbor (d_sub_neighbor, "III", pr_result)
            superimpose.globalNeighbor (d_sub_neighbor, "IMD", pr_result)
            superimpose.globalNeighbor (d_sub_neighbor, "GAI", pr_result)
            superimpose.globalNeighbor (d_sub_neighbor, "COO", pr_result)
            
            # superimpose neighbors -> with het first stabilization 
#             superimpose.globalNeighbor (d_close_het, "I", pr_hetion)
#             superimpose.globalNeighbor (d_close_het, "II", pr_hetion)
#             superimpose.globalNeighbor (d_close_het, "III", pr_hetion)
#             superimpose.globalNeighbor (d_close_het, "IMD", pr_hetion)
#             superimpose.globalNeighbor (d_close_het, "GAI", pr_hetion)
        
        if option_bond == 1 : 
        
            # check planarity imidazole + guanidium
            statistic.planarityImidazole (d_sub_neighbor, pr_result)
            statistic.planarityGuanidium (d_sub_neighbor, pr_result)
            
            statistic.lenBondAnalysis(d_sub_neighbor, "I", pr_result)
            statistic.lenBondAnalysis(d_sub_neighbor, "II", pr_result)
            statistic.lenBondAnalysis(d_sub_neighbor, "III", pr_result)
            
        if option_stat == 1: 
            # statistic
            statistic.globalRunStatistic(d_sub_neighbor, max_distance, pr_result)
#             statistic.globalRunStatistic(d_close_het, max_distance, pr_hetion)

    if option_merge == 1: 
        if option_on_complexes_by_ligand == 1: 
            statistic.MergeDataSet(pathManage.result (name_database + "/" + str (RX) + "_" + str (RFree) + "_uniquePDB"), "dataset_1.50", "dataset_3.00")
            statistic.MergeDataSet(pathManage.result (name_database + "/" + str (RX) + "_" + str (RFree) + "_uniquePDB"), "dataset_1.50", "dataset_3.00", arom = 1)
        
        else : 
            statistic.MergeDataSet(pathManage.result (name_database + "/" + str (RX) + "_" + str (RFree)), "dataset_1.50", "dataset_3.00")
예제 #25
0
파일: main.py 프로젝트: ABorrel/LSRs
def analysisBS (name_lig, ID_seq = '0.0', debug = 1):
    
    pr_result = pathManage.result(name_lig)
    pr_out = pathManage.result(name_lig + "/sameBS")
    
    # log files
    p_log_file = pr_out + "log.txt"
    filout_log = open (p_log_file, "w")

    # dictionnar with files
    d_file_BS = {}
    d_file_BS["global"] = open (pr_out + name_lig + "_", "w")
    d_file_BS["global"].write ("name_bs\tRMSD_prot\tRMSD_BS_ca\tRMSD_BS_all\tD_max\tl_at_BS\tidentic\n")
    d_file_BS["summary"] = open (pr_out + "summary.txt", "w")
    pr_dataset = pathManage.dataset(name_lig)
     
     
    l_folder_ref = listdir(pr_result)
    nb_BS = 0
    nb_BS_filtered = 0
    nb_same_BS = 0  
    for PDB_ref in l_folder_ref  :
        if debug : print PDB_ref
        if len (PDB_ref) != 4 : 
            continue
         
        p_pdb_ref = pathManage.findPDBRef(pr_dataset + PDB_ref + "/")
        l_p_query = pathManage.findPDBQueryTransloc (pathManage.result(name_lig) + PDB_ref + "/")
        
        if debug : print l_p_query
        for p_query in l_p_query : 
            
            # read TM Align
            if debug : print p_query.split ("/")[-1][7:-4]
            
            p_TMalign =  pathManage.alignmentOutput(name_lig) + p_pdb_ref.split ("/")[-1][0:-4] + "__" + p_query.split ("/")[-1][7:-4] + "/RMSD"
            try : score_align = parseTMalign.parseOutputTMalign(p_TMalign)
            except : 
                filout_log.write ("ERROR TM align " + p_TMalign + "\n")
                continue
            nb_BS = nb_BS + 1
            
            if score_align["IDseq"] >= ID_seq : 
                nb_BS_filtered = nb_BS_filtered + 1
                
                l_p_substruct_ref = pathManage.findSubstructRef (pr_dataset + PDB_ref + "/", name_lig)
                
                # sub BS
                for p_substruct_ref in l_p_substruct_ref : 
                    struct_substitued = p_substruct_ref.split ("_")[-2]
                    
                    # write header
                    if not struct_substitued in d_file_BS.keys () : 
                        d_file_BS[struct_substitued] = open (pr_out + name_lig + "_" + struct_substitued + "_", "w")
                        d_file_BS[struct_substitued].write ("name_bs\tRMSD_prot\tRMSD_BS_ca\tRMSD_BS_all\tD_max\tl_at_BS\tidentic\n")
                        
                    RMSD_bs = analysis.computeRMSDBS (p_pdb_ref, p_query, p_substruct_ref, pr_out)
                    if RMSD_bs != [] : 
                        d_file_BS[struct_substitued].write (p_substruct_ref.split("/")[-1][0:-4] +  "_*_" + p_query.split ("/")[-1][0:-4] + "\t" + str(score_align["RMSD"]) + "\t" + str(RMSD_bs[1]) + "\t" + str(RMSD_bs[0]) + "\t" + str(RMSD_bs[2]) + "\t" + str(RMSD_bs[-2]) + "\t" + str(RMSD_bs[-1]) + "\n")
                      
  

                p_ligand_ref = pathManage.findligandRef(pr_dataset + PDB_ref + "/", name_lig)
                RMSD_bs_lig = analysis.computeRMSDBS (p_pdb_ref, p_query, p_ligand_ref, pr_out)
                if RMSD_bs_lig != [] : 
                    d_file_BS["global"].write (p_ligand_ref.split("/")[-1][0:-4] +  "_*_" + p_query.split ("/")[-1][0:-4] + "\t" + str(score_align["RMSD"]) + "\t" + str(RMSD_bs_lig[1]) + "\t" + str(RMSD_bs_lig[0]) + "\t" + str(RMSD_bs_lig[2]) + "\t" + str(RMSD_bs_lig[-2]) + "\t" + str(RMSD_bs_lig[-1]) + "\n")
                    if RMSD_bs_lig [-1] == 1 : 
                        nb_same_BS = nb_same_BS + 1


    # write summary
    d_file_BS["summary"].write ("BS global: " + str (nb_BS) + "\n")
    d_file_BS["summary"].write ("BS - IDseq " + str (ID_seq) + "%: " +  str (nb_BS_filtered) + "\n")
    d_file_BS["summary"].write ("BS - same atom number: " + str (nb_same_BS) + "\n")
    
    filout_log.close ()
                    
    
    # close files and run histograms                
    for k_dico in d_file_BS.keys () : 
        p_file = d_file_BS[k_dico].name
        d_file_BS[k_dico].close ()
        if name_lig == "ATP" : 
            runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 5.0)
        elif name_lig == "ADP" : 
            runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 4.0)
        elif name_lig == "AMP" : 
            runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 4.0)
        else : 
            runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 3.5)
             

        
    return 1
예제 #26
0
파일: cleanResult.py 프로젝트: ABorrel/LSRs
                            else :
                                i = i + 1
                        if len (l_ref) != 0 : 
                            filout_smile.write (str(l_elem1[0]) + "\t" + str (len (l_queries)) + "\t" + " ".join(l_queries) + "\t" + " ".join (l_ref) + "\t" + " ".join (l_lig) + "\n")
                    filout_smile.close ()
                            



####################
###   MAIN     #####
####################
# constante
thresold_RX = 2.7
thresold_BS = 4.5
thresold_blast = 1e-100
thresold_superimposed_ribose = 2.5
thresold_superimposed_pi = 3
thresold_IDseq = 100
thresold_shaep = 0.2
l_ligand_out = ["AMP", "ADP", "ATP", "TTP", "DCP", "DGT", "DTP", "DUP", "ACP", "AD9", "NAD", "AGS", "UDP", "POP", "APC", "CTP", "AOV"]


# main #
########
pr_result = pathManage.result()
#cleanResultFolder (thresold_shaep, l_ligand_out, pr_result)
cleanSmileFile (thresold_shaep, l_ligand_out, pr_result)


예제 #27
0
def SheapScoreToClass(prclassif):
    nameREF = prclassif.split("/")[-1]

    prout = pathManage.result(nameREF + "_SheapClassif")
    pfilout = prout + "PiLSRType"
    filout = open(pfilout, "w")
    filout.write("ClassLSR\tESP\tshape\tName\n")

    lprref = []
    lfoldergroups = listdir(prclassif)
    for foldergroup in lfoldergroups:
        if foldergroup == "cycle":
            lsubtypes = listdir(prclassif + "/cycle/")
            for subtype in lsubtypes:
                lrefprot = listdir(prclassif + "/cycle/" + subtype)
                for refprot in lrefprot:
                    lprref.append(prclassif + "/cycle/" + subtype + "/" +
                                  refprot)
        else:
            lrefprot = listdir(prclassif + "/" + foldergroup + "/")
            for refprot in lrefprot:
                lprref.append(prclassif + "/" + foldergroup + "/" + refprot)

    for reffolder in lprref:
        #print reffolder
        classcycle = reffolder.split("/")[-3]
        if classcycle == "cycle":
            classif = classcycle + "-" + reffolder.split("/")[-2]
        else:
            classif = reffolder.split("/")[-2]

        # PDB reference
        PDBref = reffolder.split("/")[-1]
        PDBref = PDBref.split("_")[-1]

        lLSR = listdir(reffolder + "/LSR")
        lgdREF = ""
        for fileLSR in lLSR:
            if search("LSR_REF", fileLSR):
                lgdREF = fileLSR.split("_")[2]
                break

        if lgdREF == "":
            print "Error reference l.49 classifResult.py"

        for fileLSR in lLSR:
            if search(".pdb", fileLSR):
                lelemsplit = fileLSR.split("_")
                typeLSR = lelemsplit[1]
                if typeLSR == "REF":
                    continue
                lgd = lelemsplit[2]
                PDBLSR = lelemsplit[3]
                #print classif, PDBref, typeLSR, lgd, PDBLSR
                # file sheap in result folder
                psheap = pathManage.result(
                ) + lgdREF + "/" + PDBref + "/substituent_" + lgd + "_" + PDBLSR + "_" + typeLSR + ".hit"
                #print psheap
                if not path.exists(psheap):
                    continue
                dsheap = parseShaep.parseOutputShaep(psheap)
                filout.write(classif + "\t" + str(dsheap["ESP_similarity"]) +
                             "\t" + str(dsheap["shape_similarity"]) + "\t" +
                             lgd + "_" + PDBLSR + "_" + typeLSR + "\n")
    filout.close()

    # plot R to do

    runOtherSoft.plotClassifSheap(pfilout)
예제 #28
0
def analysisBS(name_lig, ID_seq='0.0', debug=1):

    pr_result = pathManage.result(name_lig)
    pr_out = pathManage.result(name_lig + "/sameBS")

    # log files
    p_log_file = pr_out + "log.txt"
    filout_log = open(p_log_file, "w")

    # dictionnar with files
    d_file_BS = {}
    d_file_BS["global"] = open(pr_out + name_lig + "_", "w")
    d_file_BS["global"].write(
        "name_bs\tRMSD_prot\tRMSD_BS_ca\tRMSD_BS_all\tD_max\tl_at_BS\tidentic\n"
    )
    d_file_BS["summary"] = open(pr_out + "summary.txt", "w")
    pr_dataset = pathManage.dataset(name_lig)

    l_folder_ref = listdir(pr_result)
    nb_BS = 0
    nb_BS_filtered = 0
    nb_same_BS = 0
    for PDB_ref in l_folder_ref:
        if debug: print PDB_ref
        if len(PDB_ref) != 4:
            continue

        p_pdb_ref = pathManage.findPDBRef(pr_dataset + PDB_ref + "/")
        l_p_query = pathManage.findPDBQueryTransloc(
            pathManage.result(name_lig) + PDB_ref + "/")

        if debug: print l_p_query
        for p_query in l_p_query:

            # read TM Align
            if debug: print p_query.split("/")[-1][7:-4]

            p_TMalign = pathManage.alignmentOutput(name_lig) + p_pdb_ref.split(
                "/")[-1][0:-4] + "__" + p_query.split("/")[-1][7:-4] + "/RMSD"
            try:
                score_align = parseTMalign.parseOutputTMalign(p_TMalign)
            except:
                filout_log.write("ERROR TM align " + p_TMalign + "\n")
                continue
            nb_BS = nb_BS + 1

            if score_align["IDseq"] >= ID_seq:
                nb_BS_filtered = nb_BS_filtered + 1

                l_p_substruct_ref = pathManage.findSubstructRef(
                    pr_dataset + PDB_ref + "/", name_lig)

                # sub BS
                for p_substruct_ref in l_p_substruct_ref:
                    struct_substitued = p_substruct_ref.split("_")[-2]

                    # write header
                    if not struct_substitued in d_file_BS.keys():
                        d_file_BS[struct_substitued] = open(
                            pr_out + name_lig + "_" + struct_substitued + "_",
                            "w")
                        d_file_BS[struct_substitued].write(
                            "name_bs\tRMSD_prot\tRMSD_BS_ca\tRMSD_BS_all\tD_max\tl_at_BS\tidentic\n"
                        )

                    RMSD_bs = analysis.computeRMSDBS(p_pdb_ref, p_query,
                                                     p_substruct_ref, pr_out)
                    if RMSD_bs != []:
                        d_file_BS[struct_substitued].write(
                            p_substruct_ref.split("/")[-1][0:-4] + "_*_" +
                            p_query.split("/")[-1][0:-4] + "\t" +
                            str(score_align["RMSD"]) + "\t" + str(RMSD_bs[1]) +
                            "\t" + str(RMSD_bs[0]) + "\t" + str(RMSD_bs[2]) +
                            "\t" + str(RMSD_bs[-2]) + "\t" + str(RMSD_bs[-1]) +
                            "\n")

                p_ligand_ref = pathManage.findligandRef(
                    pr_dataset + PDB_ref + "/", name_lig)
                RMSD_bs_lig = analysis.computeRMSDBS(p_pdb_ref, p_query,
                                                     p_ligand_ref, pr_out)
                if RMSD_bs_lig != []:
                    d_file_BS["global"].write(
                        p_ligand_ref.split("/")[-1][0:-4] + "_*_" +
                        p_query.split("/")[-1][0:-4] + "\t" +
                        str(score_align["RMSD"]) + "\t" + str(RMSD_bs_lig[1]) +
                        "\t" + str(RMSD_bs_lig[0]) + "\t" +
                        str(RMSD_bs_lig[2]) + "\t" + str(RMSD_bs_lig[-2]) +
                        "\t" + str(RMSD_bs_lig[-1]) + "\n")
                    if RMSD_bs_lig[-1] == 1:
                        nb_same_BS = nb_same_BS + 1

    # write summary
    d_file_BS["summary"].write("BS global: " + str(nb_BS) + "\n")
    d_file_BS["summary"].write("BS - IDseq " + str(ID_seq) + "%: " +
                               str(nb_BS_filtered) + "\n")
    d_file_BS["summary"].write("BS - same atom number: " + str(nb_same_BS) +
                               "\n")

    filout_log.close()

    # close files and run histograms
    for k_dico in d_file_BS.keys():
        p_file = d_file_BS[k_dico].name
        d_file_BS[k_dico].close()
        if name_lig == "ATP":
            runOtherSoft.RhistogramRMSD(p_file, max_RMSD=5.0)
        elif name_lig == "ADP":
            runOtherSoft.RhistogramRMSD(p_file, max_RMSD=4.0)
        elif name_lig == "AMP":
            runOtherSoft.RhistogramRMSD(p_file, max_RMSD=4.0)
        else:
            runOtherSoft.RhistogramRMSD(p_file, max_RMSD=3.5)

    return 1
예제 #29
0
파일: cleanResult.py 프로젝트: papoku/LSRs
                        if len(l_ref) != 0:
                            filout_smile.write(
                                str(l_elem1[0]) + "\t" + str(len(l_queries)) +
                                "\t" + " ".join(l_queries) + "\t" +
                                " ".join(l_ref) + "\t" + " ".join(l_lig) +
                                "\n")
                    filout_smile.close()


####################
###   MAIN     #####
####################
# constante
thresold_RX = 2.7
thresold_BS = 4.5
thresold_blast = 1e-100
thresold_superimposed_ribose = 2.5
thresold_superimposed_pi = 3
thresold_IDseq = 100
thresold_shaep = 0.2
l_ligand_out = [
    "AMP", "ADP", "ATP", "TTP", "DCP", "DGT", "DTP", "DUP", "ACP", "AD9",
    "NAD", "AGS", "UDP", "POP", "APC", "CTP", "AOV"
]

# main #
########
pr_result = pathManage.result()
#cleanResultFolder (thresold_shaep, l_ligand_out, pr_result)
cleanSmileFile(thresold_shaep, l_ligand_out, pr_result)
예제 #30
0
def SheapScoreToClass(prclassif):
    nameREF = prclassif.split("/")[-1]

    prout = pathManage.result(nameREF + "_SheapClassif")
    pfilout = prout + "PiLSRType"
    filout = open(pfilout, "w")
    filout.write("ClassLSR\tESP\tshape\tName\n")

    lprref = []
    lfoldergroups = listdir(prclassif)
    for foldergroup in lfoldergroups:
        if foldergroup == "cycle":
            lsubtypes = listdir(prclassif + "/cycle/")
            for subtype in lsubtypes:
                lrefprot = listdir(prclassif + "/cycle/" + subtype)
                for refprot in lrefprot:
                    lprref.append(prclassif + "/cycle/" + subtype + "/" + refprot)
        else:
            lrefprot = listdir(prclassif + "/" + foldergroup + "/")
            for refprot in lrefprot:
                lprref.append(prclassif + "/" + foldergroup + "/" + refprot)

    for reffolder in lprref:
        #print reffolder
        classcycle = reffolder.split("/")[-3]
        if classcycle == "cycle":
            classif = classcycle + "-" + reffolder.split("/")[-2]
        else:
            classif = reffolder.split("/")[-2]

        # PDB reference
        PDBref = reffolder.split("/")[-1]
        PDBref = PDBref.split("_")[-1]

        lLSR = listdir(reffolder + "/LSR")
        lgdREF = ""
        for fileLSR in lLSR:
            if search("LSR_REF", fileLSR):
                lgdREF = fileLSR.split("_")[2]
                break

        if lgdREF == "":
            print "Error reference l.49 classifResult.py"

        for fileLSR in lLSR:
            if search(".pdb", fileLSR):
                lelemsplit = fileLSR.split("_")
                typeLSR = lelemsplit[1]
                if typeLSR == "REF":
                    continue
                lgd = lelemsplit[2]
                PDBLSR = lelemsplit[3]
                #print classif, PDBref, typeLSR, lgd, PDBLSR
                # file sheap in result folder
                psheap = pathManage.result() + lgdREF + "/" + PDBref + "/substituent_" + lgd + "_" + PDBLSR + "_" + typeLSR + ".hit"
                #print psheap
                if not path.exists(psheap):
                    continue
                dsheap = parseShaep.parseOutputShaep(psheap)
                filout.write(classif + "\t" + str(dsheap["ESP_similarity"]) + "\t" + str(dsheap["shape_similarity"]) + "\t" + lgd + "_" + PDBLSR + "_" + typeLSR + "\n")
    filout.close()
예제 #31
0
#
#######################
# CLASSIFICATION LSRs #
#######################

name_folder_final = "withoutLig"
# manageResult (["AMP", "ADP", "POP", "ATP"], name_folder_final, l_ligand_out)
# arrangeResult.qualityExtraction (["AMP", "ADP", "POP", "ATP"], name_folder_final, p_list_ligand = "/home/borrel/Yue_project/resultLigandInPDB", thresold_sheap = thresold_shaep)
#arrangeResult.countingSubstituent(name_folder_final)

###################################################
# AFFINITY AND INTERACTIONS BY PROTEIN REFERENCE  #
###################################################

# folder final
pr_classif = pathManage.result("final_" + name_folder_final) + "Pi_LSR"
#ligandSimilarity.analyseLGDProximity(pr_classif)

#########################################
# ANALYSE CLASSIFICATION BASED ON SHEAP #
#########################################

classifResults.SheapScoreToClass(pr_classif)

######################
# ANALYSE REFERENCE  #
######################

# analyse enantiomer
# arrangeResult.enantiomer(["AMP", "ADP", "ATP"], name_folder_final)
# analyse the superimposition of ligand references
예제 #32
0
def retrieveSubstructSuperimposed(name_lig,
                                  thresold_BS=4.5,
                                  thresold_superimposed_ribose=2.5,
                                  thresold_superimposed_pi=3,
                                  thresold_shaep=0.4):

    # ouput
    p_dir_dataset = pathManage.dataset(name_lig)
    p_dir_result = pathManage.result(name_lig)
    l_folder_ref = listdir(p_dir_dataset)

    # log control
    p_log = open(p_dir_result + "log_superimposed.txt", "w")

    # control extraction
    d_control = {}
    d_control["pr ref"] = 0
    d_control["lig query"] = 0
    d_control["subref"] = {}
    d_control["subref empty"] = {}
    d_control["out sheap"] = {}
    filout_control = open(p_dir_result + "quality_extraction.txt", "w")

    # stock smile code
    d_smile = {}

    # sheap control
    d_filout_sheap = {}
    d_filout_sheap["list"] = [p_dir_result + "shaep_global.txt"]
    d_filout_sheap["global"] = open(p_dir_result + "shaep_global.txt", "w")
    d_filout_sheap["global"].write(
        "name\tbest_similarity\tshape_similarity\tESP_similarity\n")

    for ref_folder in l_folder_ref:
        # control folder reference name
        if len(ref_folder) != 4:
            p_log.write("[ERROR folder] -> " + ref_folder + "\n")
            continue

        # reference
        p_lig_ref = pathManage.findligandRef(p_dir_dataset + ref_folder + "/",
                                             name_lig)
        try:
            lig_ref_parsed = parsePDB.loadCoordSectionPDB(p_lig_ref, "HETATM")
#             print len (lig_ref_parsed)
        except:
            p_log.write("[ERROR ligand ref] -> " + p_lig_ref + "\n")
            continue

        #control
        d_control["pr ref"] = d_control["pr ref"] + 1

        # output by reference
        p_dir_result_ref = pathManage.result(name_lig + "/" + ref_folder)
        d_filout_superimposed = {}
        d_filout_superimposed["global"] = open(
            p_dir_result_ref + "all_ligand_aligned.pdb", "w")
        d_filout_superimposed["sheap"] = open(
            p_dir_result_ref + "all_ligand_aligned_" + str(thresold_shaep) +
            ".pdb", "w")

        # write lig ref -> connect matrix corrrect in all reference and all sheap
        writePDBfile.coordinateSection(d_filout_superimposed["global"],
                                       lig_ref_parsed,
                                       "HETATM",
                                       connect_matrix=1)
        writePDBfile.coordinateSection(d_filout_superimposed["sheap"],
                                       lig_ref_parsed,
                                       "HETATM",
                                       connect_matrix=1)

        # inspect folder dataset
        l_pdbfile = listdir(p_dir_dataset + ref_folder + "/")
        for pdbfile in l_pdbfile:
            # no ligand file
            if len(pdbfile.split("_")) == 1:
                continue
            pdbfile = pdbfile[:-4]  # remove extention

            if len(pdbfile.split("_")[0]) == 3 and len(pdbfile.split(
                    "_")[1]) == 4 and pdbfile.split("_")[1] != ref_folder:
                p_lig = p_dir_dataset + ref_folder + "/" + pdbfile + ".pdb"
                if p_lig_ref != p_lig:
                    # pass case where ligand replace same ligand -> does not need run
                    if pdbfile.split("_")[0] == name_lig:
                        p_log.write("[REMOVE] -> same ligand substituate")
                        continue

                    # parsed ligand query
                    lig_parsed = parsePDB.loadCoordSectionPDB(p_lig, "HETATM")

                    # find matrix of rotation
                    p_matrix = pathManage.findMatrix(p_lig_ref, p_lig,
                                                     name_lig)
                    # control file matrix exist
                    if not path.exists(p_matrix):
                        p_log.write("[ERROR] -> Matrix transloc " + p_lig_ref +
                                    " " + p_lig + " " + name_lig + "\n")
                        continue

                    # control
                    d_control["lig query"] = d_control["lig query"] + 1

                    # find the path of complex used
                    p_complex = p_dir_dataset + ref_folder + "/" + p_lig.split(
                        "/")[-1][4:]

                    # ligand rotated -> change the referentiel
                    superposeStructure.applyMatrixLigand(lig_parsed, p_matrix)

                    # use substruct
                    l_p_substruct_ref = pathManage.findSubstructRef(
                        pathManage.dataset(name_lig) + ref_folder + "/",
                        name_lig)
                    for p_substruct_ref in l_p_substruct_ref:
                        # ribose or phosphate
                        struct_type = p_substruct_ref.split("_")[-2]
                        substruct_parsed = parsePDB.loadCoordSectionPDB(
                            p_substruct_ref, "HETATM")

                        l_atom_substituate = neighborSearch.searchNeighborAtom(
                            substruct_parsed,
                            lig_parsed,
                            struct_type,
                            p_log,
                            thresold_superimposed_ribose=
                            thresold_superimposed_ribose,
                            thresold_superimposed_pi=thresold_superimposed_pi)
                        # control find
                        if len(l_atom_substituate) == 0:
                            if not struct_type in d_control[
                                    "subref empty"].keys():
                                d_control["subref empty"][struct_type] = 1
                            else:
                                d_control["subref empty"][
                                    struct_type] = d_control["subref empty"][
                                        struct_type] + 1
                            continue

                        else:
                            if not struct_type in d_control["subref"].keys():
                                d_control["subref"][struct_type] = 1
                            else:
                                d_control["subref"][struct_type] = d_control[
                                    "subref"][struct_type] + 1

                            # write PDB file, convert smile
                            p_substituate_pdb = p_dir_result_ref + "substituent_" + pdbfile.split(
                                "_")[0] + "_" + pdbfile.split(
                                    "_")[1] + "_" + struct_type + ".pdb"
                            writePDBfile.coordinateSection(p_substituate_pdb,
                                                           l_atom_substituate,
                                                           recorder="HETATM",
                                                           header=0,
                                                           connect_matrix=1)

                            # sheap reference on part of ligand
                            p_sheap = runOtherSoft.runShaep(
                                p_substruct_ref,
                                p_substituate_pdb,
                                p_substituate_pdb[0:-4] + ".hit",
                                clean=0)
                            val_sheap = parseShaep.parseOutputShaep(p_sheap)
                            if val_sheap == {}:
                                p_log.write("[ERROR] -> ShaEP " +
                                            p_substituate_pdb + " " +
                                            p_substruct_ref + "\n")

                                if not struct_type in d_control[
                                        "out sheap"].keys():
                                    d_control["out sheap"][struct_type] = 1
                                else:
                                    d_control["out sheap"][
                                        struct_type] = d_control["out sheap"][
                                            struct_type] + 1
                                continue

                            # control thresold sheap
                            if not struct_type in d_filout_sheap.keys():
                                d_filout_sheap[struct_type] = {}
                                d_filout_sheap[struct_type] = open(
                                    p_dir_result + "shaep_global_" +
                                    struct_type + ".txt", "w")
                                d_filout_sheap[struct_type].write(
                                    "name\tbest_similarity\tshape_similarity\tESP_similarity\n"
                                )
                                d_filout_sheap["list"].append(
                                    p_dir_result + "shaep_global_" +
                                    struct_type +
                                    ".txt")  # to improve with python function

                            # write value in ShaEP control
                            d_filout_sheap[struct_type].write(
                                ref_folder + "_" + str(pdbfile.split("_")[1]) +
                                "_" + struct_type + "_" +
                                str(pdbfile.split("_")[0]) + "\t" +
                                str(val_sheap["best_similarity"]) + "\t" +
                                str(val_sheap["shape_similarity"]) + "\t" +
                                str(val_sheap["ESP_similarity"]) + "\n")
                            d_filout_sheap["global"].write(
                                ref_folder + "_" + str(pdbfile.split("_")[1]) +
                                "_" + struct_type + "_" +
                                str(pdbfile.split("_")[0]) + "\t" +
                                str(val_sheap["best_similarity"]) + "\t" +
                                str(val_sheap["shape_similarity"]) + "\t" +
                                str(val_sheap["ESP_similarity"]) + "\n")

                            # rename file substituent with shaEP value
                            rename(
                                p_substituate_pdb,
                                p_substituate_pdb[:-4] + "_" +
                                str(val_sheap["best_similarity"]) + ".pdb")
                            # rename and change the file name
                            p_substituate_pdb = p_substituate_pdb[:-4] + "_" + str(
                                val_sheap["best_similarity"]) + ".pdb"

                            # write all substruct in global file
                            writePDBfile.coordinateSection(
                                d_filout_superimposed["global"],
                                lig_parsed,
                                recorder="HETATM",
                                header=str(p_lig.split("/")[-1]) + "_" +
                                str(val_sheap["best_similarity"]),
                                connect_matrix=1)

                            # control sheap thresold
                            if float(val_sheap["best_similarity"]
                                     ) >= thresold_shaep:

                                # write subligand superimposed selected in global files
                                writePDBfile.coordinateSection(
                                    d_filout_superimposed["sheap"],
                                    lig_parsed,
                                    recorder="HETATM",
                                    header=str(p_lig.split("/")[-1]) + "_" +
                                    str(val_sheap["best_similarity"]),
                                    connect_matrix=1)

                                ############
                                # write BS #
                                ############
                                # not only protein superimposed -> also ion and water
                                l_atom_complex = parsePDB.loadCoordSectionPDB(
                                    p_complex)
                                superposeStructure.applyMatrixProt(
                                    l_atom_complex, p_matrix)
                                p_file_cx = p_dir_result_ref + "CX_" + p_lig.split(
                                    "/")[-1]
                                # write CX
                                writePDBfile.coordinateSection(
                                    p_file_cx,
                                    l_atom_complex,
                                    recorder="ATOM",
                                    header=p_lig.split("/")[-1],
                                    connect_matrix=0)

                                # search atom in BS
                                l_atom_binding_site = []
                                for atom_complex in l_atom_complex:
                                    for atom_substruct in lig_parsed:
                                        if parsePDB.distanceTwoatoms(
                                                atom_substruct,
                                                atom_complex) <= thresold_BS:
                                            if not atom_complex in l_atom_binding_site:
                                                l_atom_binding_site.append(
                                                    deepcopy(atom_complex))

                                # 3. retrieve complet residue
                                l_atom_BS_res = parsePDB.getResidues(
                                    l_atom_binding_site, l_atom_complex)

                                # 4. write binding site
                                p_binding = p_dir_result_ref + "BS_" + p_lig.split(
                                    "/")[-1]
                                writePDBfile.coordinateSection(
                                    p_binding,
                                    l_atom_BS_res,
                                    "ATOM",
                                    p_binding,
                                    connect_matrix=0)

                                # smile code substituate analysis
                                # Step smile -> not conversion if shaep not validate
                                smile_find = runOtherSoft.babelConvertPDBtoSMILE(
                                    p_substituate_pdb)
                                if not struct_type in d_smile.keys():
                                    d_smile[struct_type] = {}
                                    d_smile[struct_type][smile_find] = {}
                                    d_smile[struct_type][smile_find][
                                        "count"] = 1
                                    d_smile[struct_type][smile_find]["PDB"] = [
                                        pdbfile.split("_")[1]
                                    ]
                                    d_smile[struct_type][smile_find][
                                        "ligand"] = [pdbfile.split("_")[0]]
                                    d_smile[struct_type][smile_find]["ref"] = [
                                        ref_folder
                                    ]
                                else:
                                    if not smile_find in d_smile[
                                            struct_type].keys():
                                        d_smile[struct_type][smile_find] = {}
                                        d_smile[struct_type][smile_find][
                                            "count"] = 1
                                        d_smile[struct_type][smile_find][
                                            "PDB"] = [pdbfile.split("_")[1]]
                                        d_smile[struct_type][smile_find][
                                            "ligand"] = [
                                                pdbfile.split("_")[0]
                                            ]
                                        d_smile[struct_type][smile_find][
                                            "ref"] = [ref_folder]
                                    else:
                                        d_smile[struct_type][smile_find][
                                            "count"] = d_smile[struct_type][
                                                smile_find]["count"] + 1
                                        d_smile[struct_type][smile_find][
                                            "PDB"].append(
                                                pdbfile.split("_")[1])
                                        d_smile[struct_type][smile_find][
                                            "ligand"].append(
                                                pdbfile.split("_")[0])
                                        d_smile[struct_type][smile_find][
                                            "ref"].append(ref_folder)

                            else:
                                if not struct_type in d_control[
                                        "out sheap"].keys():
                                    d_control["out sheap"][struct_type] = 1
                                else:
                                    d_control["out sheap"][
                                        struct_type] = d_control["out sheap"][
                                            struct_type] + 1

        tool.closeDicoFile(d_filout_superimposed)

    # sheap control
    tool.closeDicoFile(d_filout_sheap)
    for p_file_sheap in d_filout_sheap["list"]:
        runOtherSoft.RhistogramMultiple(p_file_sheap)

    # write list of smile
    for substruct in d_smile.keys():
        p_list_smile = pathManage.result(
            name_lig) + "list_" + substruct + "_" + str(
                thresold_shaep) + "_smile.txt"
        filout_smile = open(p_list_smile, "w")
        for smile_code in d_smile[substruct].keys():
            l_lig = d_smile[substruct][smile_code]["ligand"]
            l_PDB = d_smile[substruct][smile_code]["PDB"]
            l_ref = d_smile[substruct][smile_code]["ref"]
            filout_smile.write(
                str(smile_code) + "\t" +
                str(d_smile[substruct][smile_code]["count"]) + "\t" +
                " ".join(l_PDB) + "\t" + " ".join(l_ref) + "\t" +
                " ".join(l_lig) + "\n")
        filout_smile.close()
    p_log.close()

    # control
    filout_control.write("NB ref: " + str(d_control["pr ref"]) + "\n")
    filout_control.write("Ligand query: " + str(d_control["lig query"]) + "\n")
    for k in d_control["subref"].keys():
        filout_control.write("LSR " + str(k) + ": " +
                             str(d_control["subref"][k]) + "\n")
    for k in d_control["subref empty"].keys():
        filout_control.write("NB LSR empty " + str(k) + ": " +
                             str(d_control["subref empty"][k]) + "\n")
    for k in d_control["out sheap"].keys():
        filout_control.write("LSR out by sheap " + str(k) + ": " +
                             str(d_control["out sheap"][k]) + "\n")

    filout_control.write("**********************\n\n")
    for k in d_control["subref"].keys():
        filout_control.write("LSR keep" + str(k) + ": " +
                             str(d_control["subref"][k] -
                                 d_control["out sheap"][k]) + "\n")

    filout_control.close()

    return 1
예제 #33
0
def Builder(name_database, RX = 3.00, RFree = 0.25, one_PDB_by_lig = 0, debug = 1):
    """
    Dataset Builder
    in : - open file result of filter ligand PDB
    out : - log file
          - dataset file -> ligand with associated PDB
    """
    
    if one_PDB_by_lig == 0 : 
        name_dataset = name_database + "/" + str (RX) + "_" + str (RFree) + "_multiPDB"
    else : 
        name_dataset = name_database + "/" + str (RX) + "_" + str (RFree) + "_uniquePDB"
    
    pr_database = pathManage.result(name_database)
    pr_result = pathManage.result(name_dataset)
    if debug : print "== Path result " + pr_result + "==\n"
    
    # check dataSet exist !!!!!!
    # short cut
    l_file_dataset = pathManage.retriveDataSetFile (pr_result)
    if len(l_file_dataset) != 0 : 
        return l_file_dataset


    # load structure    
    d_lig_PDB = loadFile.LigandInPDB(pr_database + "resultLigandInPDB")
    
    nb_lig = len(d_lig_PDB.keys())
    print "NB ligand included database:", nb_lig
    
    # print d_lig_PDB.keys().index("HSO") -> search index ligand
    
    i = 0
    while i < nb_lig:
        name_lig = d_lig_PDB.keys()[i]
        
        #######################################
        # step 1 search chemical substructure #
        #######################################
        PDB_ref = d_lig_PDB[name_lig][0]
        if debug : print PDB_ref, name_lig, i, nb_lig
        # if not possible to load the ligand -> remove lig
        try : l_atom_lig_ref = loadFile.ligandInPDBConnectMatrixLigand(PDB_ref, name_lig)
        except : 
            if debug == 1 : print "Exit => load ligand-l59"
            del d_lig_PDB[name_lig]
            nb_lig = nb_lig - 1
            continue
        
        # search substructure interest
        l_interest_sub = searchPDB.interestStructure(l_atom_lig_ref) # search interest structure
        if debug : print "Interest substructures in " + str(name_lig) + "-" + str (PDB_ref) + " " + "-".join(l_interest_sub)
        if l_interest_sub == []:
            if debug == 1 : print "Exit => Not substructure-l68"
            del d_lig_PDB[name_lig]
            nb_lig = nb_lig - 1
            continue
        
        #######################################################
        # Step 2 Control quality of PDB + ligand hooked + option one #
        #######################################################
        else : 
            # control dataset quality
            if debug : print "List PDB checked -> ", d_lig_PDB[name_lig]
            l_PDB = checkPDBfile.CheckComplexQuality(d_lig_PDB[name_lig], name_lig, RX, RFree, one_PDB_by_lig)
            # remove the entrance key with the ligand
            if l_PDB == []:
                if debug == 1 : print "Exit => Not No PDB selected-l82"
                del d_lig_PDB[name_lig]
                nb_lig = nb_lig - 1
                continue
            else : 
                d_lig_PDB[name_lig] = l_PDB
        i = i + 1
        
        
    if debug == 1 : print "Number of ligand selected =>", nb_lig
                
    # structure and file dataset and control RX + length bond
    WriteDataset (d_lig_PDB, pr_result)
    
       
    return  Builder(name_database, RX , RFree , one_PDB_by_lig , debug = 1)