Example #1
def globalArrangement (pr_orgin, p_smile, p_family, name_ligand, l_ligand_out):
#     print "--------"
#     print pr_orgin
#     print p_smile
#     print p_family
#     print name_ligand
#     print "--------"
    subst = p_smile.split ("_")[-3]
    filin = open (p_smile, "r")
    l_line_smile = filin.readlines ()
    for line_smile in l_line_smile : 
        # search substructure
#         print line_smile
        l_PDB_query = line_smile.split ("\t")[-3].split (" ")
#         print l_PDB_query
        l_PDB_ref = line_smile.split ("\t")[-2].split (" ")
        l_ligand = line_smile.strip().split ("\t")[-1].split (" ")
        # search replacement
        smile = line_smile.split ("\t")[0]
        # search if LSR is small -> thresold < 3
        small_LSR = smileAnalysis.smallLSR (smile) 
        if subst == "ribose" :  
            if small_LSR == 1 : 
                first_folder = "ribose_small"
            else : 
                first_folder = "ribose"
        else : 
            if small_LSR == 1 : 
                first_folder = "Pi_small"
            else : 
                first_folder = "Pi"
        print smile, l_PDB_query, l_PDB_ref, l_ligand, subst, small_LSR
        replacement, metal = smileAnalysis.searchReplacement (smile, l_PDB_query[0], l_PDB_ref[0], name_ligand)
        # case with cycle -> search replacement 2
        if replacement == "cycle" : 
            replacement2, metal = smileAnalysis.searchReplacement (smile, l_PDB_query[0], l_PDB_ref[0], name_ligand, in_cycle = 1)
            replacement = replacement + "/" + replacement2 # new folder

        # case metal
        if replacement == "metal" : 
            print metal, l_PDB_query, l_PDB_ref, name_ligand
        len_find = len (l_PDB_ref)
        i = 0
        while i < len_find : 
            # exclusion of ligand out
            if l_ligand[i] in l_ligand_out : 
                i = i + 1
            group, family = analysis.findFamilyAndGroup(l_PDB_ref[i])
            # folder reference
            pr_dataset = pathManage.dataset(name_ligand + "/" + l_PDB_ref[i])
            PDB_ref = pathManage.findPDBRef(pr_dataset)
            p_ligand_ref = pathManage.findligandRef(pr_dataset , name_ligand)
            l_frag_ref = pathManage.findSubstructRef(pr_dataset, name_ligand)
            for f_ref in l_frag_ref :
                if search (subst, f_ref) : 
                    p_frag_ref = f_ref
            # folder_query
            pr_result = pathManage.result(name_ligand + "/" + l_PDB_ref[i])
            l_protein_tranloc = pathManage.findPDBQueryTransloc(pr_result)
            for p_t in l_protein_tranloc : 
                if search (l_ligand[i], p_t) and search (l_PDB_query[i], p_t) : 
                    p_protein_query = p_t
            if replacement != "metal" : 
                p_lig_query = pathManage.findligandQuery(pr_dataset , l_ligand[i], l_PDB_query[i])
            else : 
                p_lig_query = pathManage.findligandQuery(pr_dataset ,metal, l_PDB_query[i])
            # need apply transloc matrix
            matrix_transloc = pathManage.findMatrix(p_ligand_ref, p_lig_query, name_ligand)
            lig_query_parsed = parsePDB.loadCoordSectionPDB(p_lig_query)
            try : superposeStructure.applyMatrixLigand(lig_query_parsed, matrix_transloc)
            except : 
                i = i + 1
            p_lig_substituate = pathManage.findSubstructFind(pr_result, l_ligand[i], l_PDB_query[i], subst)
            l_p_BS = pathManage.findFileBS(pr_result, l_PDB_query[i])
            for BS in l_p_BS : 
                if search (l_ligand[i], BS) : 
                    p_BS = BS
#             print pr_final
#             print "***************"
#             print PDB_ref
#             print p_ligand_ref
#             print p_frag_ref
#             print "----"
#             print p_protein_query
#             print p_lig_query
#             print p_lig_substituate
#             print p_BS
#             print "**************"
            # ajouter group + family 2 lettre
            pr_final = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-"  + str (group) + "_" + l_PDB_ref[i] +  "/" 
            pr_ligand = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-" +  str (group) + "_" + l_PDB_ref[i] + "/LGD/"
            pr_BS = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-" + str (group) + "_" + l_PDB_ref[i] + "/BS/"
            pr_sust = pr_orgin + first_folder + "/" + replacement + "/" + str (family) + "-"  + str (group) + "_" + l_PDB_ref[i] + "/LSR/"
            if not path.isdir(pr_final):
                makedirs (pr_final)
            if not path.isdir(pr_ligand):
                makedirs (pr_ligand)
            if not path.isdir(pr_BS):
                makedirs (pr_BS)
            if not path.isdir(pr_sust):
                makedirs (pr_sust)   
            # list file
            p_list_smile_queries = pr_sust + "list.smile"
            if not path.exists(p_list_smile_queries) : 
                file_smile_queries = open (p_list_smile_queries, "w")
            else : 
                file_smile_queries = open (p_list_smile_queries, "a")
            file_smile_queries.write (str(smile) + "\n")
            file_smile_queries.close ()
            # lig de la query
            writePDBfile.coordinateSection(pr_ligand + "LGD_" + p_lig_query.split ("/")[-1], lig_query_parsed, recorder = "HETATM", header = "LCG_" + p_lig_query.split ("/")[-1], connect_matrix = 1)
            runOtherSoft.babelConvertPDBtoSMILE(pr_ligand + "LGD_" + p_lig_query.split ("/")[-1], clean_smi = 1)
            # lig de reference + smile
            copy2(p_ligand_ref, pr_ligand + "LGD_REF_" + p_ligand_ref.split ("/")[-1])
            runOtherSoft.babelConvertPDBtoSMILE(pr_ligand + "LGD_REF_" + p_ligand_ref.split ("/")[-1])
            # LSR de ref
            copy2(p_frag_ref, pr_sust + "LSR_REF_" + name_ligand + "_" + l_PDB_ref[i] + ".pdb")
            # protein query
            #copy2(p_protein_query, pr_final)
            # LSR query -> p_lig_ref only for the name
            copy2(p_lig_substituate, pr_sust + "LSR_" + subst + "_"  + p_lig_query.split ("/")[-1])
            # BS query
            copy2(p_BS, pr_BS)   
            # BS from reference
            l_atom_BS = parsePDB.computeBS (PDB_ref, p_ligand_ref, thresold = 4.50, option_onlyATOM = 0)
            writePDBfile.coordinateSection(pr_BS + "BS_REF_" + name_ligand + "_" + PDB_ref.split ("/")[-1], l_atom_BS, recorder = "ATOM", header = "BS_REF_" + name_ligand + "_" + PDB_ref, connect_matrix = 0)
            i = i + 1
    return 1
Example #2
File: main.py Project: ABorrel/LSRs
def analysisBS (name_lig, ID_seq = '0.0', debug = 1):
    pr_result = pathManage.result(name_lig)
    pr_out = pathManage.result(name_lig + "/sameBS")
    # log files
    p_log_file = pr_out + "log.txt"
    filout_log = open (p_log_file, "w")

    # dictionnar with files
    d_file_BS = {}
    d_file_BS["global"] = open (pr_out + name_lig + "_", "w")
    d_file_BS["global"].write ("name_bs\tRMSD_prot\tRMSD_BS_ca\tRMSD_BS_all\tD_max\tl_at_BS\tidentic\n")
    d_file_BS["summary"] = open (pr_out + "summary.txt", "w")
    pr_dataset = pathManage.dataset(name_lig)
    l_folder_ref = listdir(pr_result)
    nb_BS = 0
    nb_BS_filtered = 0
    nb_same_BS = 0  
    for PDB_ref in l_folder_ref  :
        if debug : print PDB_ref
        if len (PDB_ref) != 4 : 
        p_pdb_ref = pathManage.findPDBRef(pr_dataset + PDB_ref + "/")
        l_p_query = pathManage.findPDBQueryTransloc (pathManage.result(name_lig) + PDB_ref + "/")
        if debug : print l_p_query
        for p_query in l_p_query : 
            # read TM Align
            if debug : print p_query.split ("/")[-1][7:-4]
            p_TMalign =  pathManage.alignmentOutput(name_lig) + p_pdb_ref.split ("/")[-1][0:-4] + "__" + p_query.split ("/")[-1][7:-4] + "/RMSD"
            try : score_align = parseTMalign.parseOutputTMalign(p_TMalign)
            except : 
                filout_log.write ("ERROR TM align " + p_TMalign + "\n")
            nb_BS = nb_BS + 1
            if score_align["IDseq"] >= ID_seq : 
                nb_BS_filtered = nb_BS_filtered + 1
                l_p_substruct_ref = pathManage.findSubstructRef (pr_dataset + PDB_ref + "/", name_lig)
                # sub BS
                for p_substruct_ref in l_p_substruct_ref : 
                    struct_substitued = p_substruct_ref.split ("_")[-2]
                    # write header
                    if not struct_substitued in d_file_BS.keys () : 
                        d_file_BS[struct_substitued] = open (pr_out + name_lig + "_" + struct_substitued + "_", "w")
                        d_file_BS[struct_substitued].write ("name_bs\tRMSD_prot\tRMSD_BS_ca\tRMSD_BS_all\tD_max\tl_at_BS\tidentic\n")
                    RMSD_bs = analysis.computeRMSDBS (p_pdb_ref, p_query, p_substruct_ref, pr_out)
                    if RMSD_bs != [] : 
                        d_file_BS[struct_substitued].write (p_substruct_ref.split("/")[-1][0:-4] +  "_*_" + p_query.split ("/")[-1][0:-4] + "\t" + str(score_align["RMSD"]) + "\t" + str(RMSD_bs[1]) + "\t" + str(RMSD_bs[0]) + "\t" + str(RMSD_bs[2]) + "\t" + str(RMSD_bs[-2]) + "\t" + str(RMSD_bs[-1]) + "\n")

                p_ligand_ref = pathManage.findligandRef(pr_dataset + PDB_ref + "/", name_lig)
                RMSD_bs_lig = analysis.computeRMSDBS (p_pdb_ref, p_query, p_ligand_ref, pr_out)
                if RMSD_bs_lig != [] : 
                    d_file_BS["global"].write (p_ligand_ref.split("/")[-1][0:-4] +  "_*_" + p_query.split ("/")[-1][0:-4] + "\t" + str(score_align["RMSD"]) + "\t" + str(RMSD_bs_lig[1]) + "\t" + str(RMSD_bs_lig[0]) + "\t" + str(RMSD_bs_lig[2]) + "\t" + str(RMSD_bs_lig[-2]) + "\t" + str(RMSD_bs_lig[-1]) + "\n")
                    if RMSD_bs_lig [-1] == 1 : 
                        nb_same_BS = nb_same_BS + 1

    # write summary
    d_file_BS["summary"].write ("BS global: " + str (nb_BS) + "\n")
    d_file_BS["summary"].write ("BS - IDseq " + str (ID_seq) + "%: " +  str (nb_BS_filtered) + "\n")
    d_file_BS["summary"].write ("BS - same atom number: " + str (nb_same_BS) + "\n")
    filout_log.close ()
    # close files and run histograms                
    for k_dico in d_file_BS.keys () : 
        p_file = d_file_BS[k_dico].name
        d_file_BS[k_dico].close ()
        if name_lig == "ATP" : 
            runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 5.0)
        elif name_lig == "ADP" : 
            runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 4.0)
        elif name_lig == "AMP" : 
            runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 4.0)
        else : 
            runOtherSoft.RhistogramRMSD(p_file, max_RMSD = 3.5)

    return 1
