Exemple #1
0
def SearchChemicalSubstruct (pr_data, pr_result, control = 0):
    
    # substructure search 
    
    p_filout = pr_result + "findSruct"
    
    filout = open (p_filout, "w")
    l_ligand = listdir(pr_data)
    
    print l_ligand
    
    for ligand in l_ligand : 
        print ligand
    
        l_file = listdir(pr_data + ligand + "/")
        
        for f in l_file : 
            if ligand == "ZM241385" or ligand == "source": 
                group = f
            else : 
                group = f.split ("_")[-2]
            p_file_PDB = pr_data + ligand + "/" + f
            print p_file_PDB
            
            if control == 1 : 
                ControlPDBFormat(p_file_PDB)
            
            l_atom = parsing.loadCoordSectionPDB(p_file_PDB, remove_H = 1)
            if ligand == "ZM241385" : 
                ll_atom_lig = parsing.retrieveLigand(l_atom, "ZMA")
            else : 
                ll_atom_lig = parsing.retrieveLigand(l_atom, "RES")
    
            for l_atom_lig in ll_atom_lig : 
                
                l_subs = searchPDB.interestStructure(l_atom_lig, more_flex = 1)
                
                filout.write (str (ligand) + "\t" + str (f) + "\t" + str (group) + "\t" + " ".join(l_subs) + "\n")
    
    filout.close ()
       

# best group ergomine -> 2128 (2013) && 6882 (2013)
# best group taladegid -> 7527 (2013) Vanderblit
# best group eticlopride -> 1285 (2010)
# ZM241385 => mod7msp
    
Exemple #2
0
def CheckComplexQuality(l_in, name_lig, limit_RX, limit_RFree, one_PDB_out, debug = 1):
    """Check if the PDB file is similar and preserve file with the best Quality
    remove file contain only DNA, RNA structure
    in : list of PDB files
    out : void -> change directly PDB list"""

    l_PDB = deepcopy(l_in)
    
    nb_PDB = len(l_PDB)
    l_RX = []

    i = 0
    while i < nb_PDB:
        
        # load PDB
        d_PDB_load = loadFile.ExtractInfoPDBID(l_PDB[i])
        
        # case PDB not found => often difference between list and database
        if d_PDB_load == {} :
            del l_PDB[i]
            nb_PDB = nb_PDB - 1
            continue  
        
        # check Header
        header_PDB = d_PDB_load["Header"]
        # Check DNA or RNA
        if search ("dna", header_PDB) or search ("rna", header_PDB):
            if debug == 1 : print "Exit => out dna", l_PDB[i]
            del l_PDB[i]
            nb_PDB = nb_PDB - 1
            continue
        
        # check quality
        RX = d_PDB_load["RX"]
        RFree = d_PDB_load["RFree"]
        if float (RX) > limit_RX or float (RFree) > limit_RFree : 
            if debug == 1 : print "Exit => Quality structure", l_PDB[i], RX, RFree
            del l_PDB[i]
            nb_PDB = nb_PDB - 1
            continue
         
        # check if the structure contain the substructure
        # -> because when we improve the resolution the ligand change and the search PDB -> ERROR
        l_interest_sub = searchPDB.interestStructure(d_PDB_load[name_lig][0])
        if l_interest_sub == [] : 
            if debug == 1 : print "Exit => No Sub found", l_PDB[i], RX, RFree
            del l_PDB[i]
            nb_PDB = nb_PDB - 1
            continue
            
        # Check false ligand, hooked to the protein
        l_atom_protein = d_PDB_load["protein"]
        l_atom_lig = d_PDB_load[name_lig][0]
        
        if parsing.checkLigandHooked (l_atom_protein, l_atom_lig) == 1:
            if debug == 1 : print "Exit => ligand = Modificated Amino acid", l_PDB[i]
            nb_PDB = nb_PDB - 1
            del l_PDB[i]
            continue
        
        l_RX.append (RX)
        i = i + 1
    
    
    
    if len (l_PDB) == 1 or len (l_PDB) == 0 :
        return l_PDB
    
    # case where we want only one PDB -> retrieve best quality
    if one_PDB_out == 1 : 
        return [l_PDB[l_PDB.index (min (l_PDB))]]
    
    return l_PDB
Exemple #3
0
def Builder(name_database, RX = 3.00, RFree = 0.25, one_PDB_by_lig = 0, debug = 1):
    """
    Dataset Builder
    in : - open file result of filter ligand PDB
    out : - log file
          - dataset file -> ligand with associated PDB
    """
    
    if one_PDB_by_lig == 0 : 
        name_dataset = name_database + "/" + str (RX) + "_" + str (RFree) + "_multiPDB"
    else : 
        name_dataset = name_database + "/" + str (RX) + "_" + str (RFree) + "_uniquePDB"
    
    pr_database = pathManage.result(name_database)
    pr_result = pathManage.result(name_dataset)
    if debug : print "== Path result " + pr_result + "==\n"
    
    # check dataSet exist !!!!!!
    # short cut
    l_file_dataset = pathManage.retriveDataSetFile (pr_result)
    if len(l_file_dataset) != 0 : 
        return l_file_dataset


    # load structure    
    d_lig_PDB = loadFile.LigandInPDB(pr_database + "resultLigandInPDB")
    
    nb_lig = len(d_lig_PDB.keys())
    print "NB ligand included database:", nb_lig
    
    # print d_lig_PDB.keys().index("HSO") -> search index ligand
    
    i = 0
    while i < nb_lig:
        name_lig = d_lig_PDB.keys()[i]
        
        #######################################
        # step 1 search chemical substructure #
        #######################################
        PDB_ref = d_lig_PDB[name_lig][0]
        if debug : print PDB_ref, name_lig, i, nb_lig
        # if not possible to load the ligand -> remove lig
        try : l_atom_lig_ref = loadFile.ligandInPDBConnectMatrixLigand(PDB_ref, name_lig)
        except : 
            if debug == 1 : print "Exit => load ligand-l59"
            del d_lig_PDB[name_lig]
            nb_lig = nb_lig - 1
            continue
        
        # search substructure interest
        l_interest_sub = searchPDB.interestStructure(l_atom_lig_ref) # search interest structure
        if debug : print "Interest substructures in " + str(name_lig) + "-" + str (PDB_ref) + " " + "-".join(l_interest_sub)
        if l_interest_sub == []:
            if debug == 1 : print "Exit => Not substructure-l68"
            del d_lig_PDB[name_lig]
            nb_lig = nb_lig - 1
            continue
        
        #######################################################
        # Step 2 Control quality of PDB + ligand hooked + option one #
        #######################################################
        else : 
            # control dataset quality
            if debug : print "List PDB checked -> ", d_lig_PDB[name_lig]
            l_PDB = checkPDBfile.CheckComplexQuality(d_lig_PDB[name_lig], name_lig, RX, RFree, one_PDB_by_lig)
            # remove the entrance key with the ligand
            if l_PDB == []:
                if debug == 1 : print "Exit => Not No PDB selected-l82"
                del d_lig_PDB[name_lig]
                nb_lig = nb_lig - 1
                continue
            else : 
                d_lig_PDB[name_lig] = l_PDB
        i = i + 1
        
        
    if debug == 1 : print "Number of ligand selected =>", nb_lig
                
    # structure and file dataset and control RX + length bond
    WriteDataset (d_lig_PDB, pr_result)
    
       
    return  Builder(name_database, RX , RFree , one_PDB_by_lig , debug = 1)