Beispiel #1
0
 def getRMSD(self, nSeqUnbound, seqUnboundAli, nSeqBound, seqBoundAli):
     '''
 Computes rmsd for nSeqUnbound chain unbound and nSeqBound bound chain
 @param nSeqUnbound: int. The index of the bound sequence that will be aligned
 @param seqUnboundAli: str. The alignment result for unbound sequence number nSeqUnbound
 @param nSeqBound: int. The index of the bound sequence that will be aligned
 @param seqBoundAli: str. The alignment result for bound sequence number nSeqBound
 @return rmsd. float. Root mean square deviation of CA of both imput chains
 @return boundToUnboundResDict. {Bio.PDB.Residue_bound --> Bio.PDB.Residue_unbound}
 '''
     boundToUnboundResDict, atomBoundToUnboundMap = self.build2SeqsDictMap(
         nSeqUnbound, seqUnboundAli, nSeqBound, seqBoundAli)
     atoms_x, atoms_y = zip(*atomBoundToUnboundMap)
     coords_x = np.array([elem.get_coord() for elem in atoms_x])
     coords_y = np.array([elem.get_coord() for elem in atoms_y])
     sup = SVDSuperimposer()
     rmsd = sup._rms(coords_x, coords_y)
     #    print(boundToUnboundResDict)
     return rmsd, boundToUnboundResDict
Beispiel #2
0
def calc_DockQ(model, native, use_CA_only=False):

    exec_path = os.path.dirname(os.path.abspath(sys.argv[0]))
    atom_for_sup = ['CA', 'C', 'N', 'O']
    if (use_CA_only):
        atom_for_sup = ['CA']

    cmd_fnat = exec_path + '/fnat ' + model + ' ' + native + ' 5'
    #cmd_interface=exec_path + '/fnat ' + model + ' ' + native + ' 10 backbone'
    cmd_interface = exec_path + '/fnat ' + model + ' ' + native + ' 10'

    #fnat_out = os.popen(cmd_fnat).readlines()
    fnat_out = commands.getoutput(cmd_fnat)
    #    sys.exit()
    (fnat, nat_correct, nat_total, fnonnat, nonnat_count, model_total,
     interface5A) = parse_fnat(fnat_out)
    assert fnat != -1, "Error running cmd: %s\n" % (cmd_fnat)
    #    inter_out = os.popen(cmd_interface).readlines()
    inter_out = commands.getoutput(cmd_interface)
    (fnat_bb, nat_correct_bb, nat_total_bb, fnonnat_bb, nonnat_count_bb,
     model_total_bb, interface) = parse_fnat(inter_out)
    assert fnat_bb != -1, "Error running cmd: %s\n" % (cmd_interface)

    #print fnat
    #Use same interface as for fnat for iRMS
    #interface=interface5A

    # Start the parser
    pdb_parser = Bio.PDB.PDBParser(QUIET=True)

    # Get the structures
    ref_structure = pdb_parser.get_structure("reference", native)
    sample_structure = pdb_parser.get_structure("model", model)

    # Use the first model in the pdb-files for alignment
    # Change the number 0 if you want to align to another structure
    ref_model = ref_structure[0]
    sample_model = sample_structure[0]

    # Make a list of the atoms (in the structures) you wish to align.
    # In this case we use CA atoms whose index is in the specified range
    ref_atoms = []
    sample_atoms = []

    common_interface = []

    chain_res = {}

    #find atoms common in both sample and native
    atoms_def_sample = []
    atoms_def_in_both = []
    #first read in sample
    for sample_chain in sample_model:
        #        print sample_chain
        chain = sample_chain.id
        #        print chain
        for sample_res in sample_chain:
            # print sample_res
            if sample_res.get_id()[0] != ' ':  #Skip hetatm.
                continue
            resname = sample_res.get_id()[1]
            key = str(resname) + chain
            for a in atom_for_sup:
                atom_key = key + '.' + a
                if a in sample_res:
                    if atom_key in atoms_def_sample:
                        print atom_key + ' already added (MODEL)!!!'
                    atoms_def_sample.append(atom_key)

    #then read in native also present in sample
    for ref_chain in ref_model:
        chain = ref_chain.id
        for ref_res in ref_chain:
            #print ref_res
            if ref_res.get_id()[0] != ' ':  #Skip hetatm.
                #                print ref_res.get_id()
                continue
            resname = ref_res.get_id()[1]
            key = str(resname) + chain
            for a in atom_for_sup:
                atom_key = key + '.' + a
                if a in ref_res and atom_key in atoms_def_sample:
                    if atom_key in atoms_def_in_both:
                        print atom_key + ' already added (Native)!!!'
                    atoms_def_in_both.append(atom_key)


#    print atoms_def_in_both
    for sample_chain in sample_model:
        chain = sample_chain.id
        if chain not in chain_res.keys():
            chain_res[chain] = []
        for sample_res in sample_chain:
            if sample_res.get_id()[0] != ' ':  #Skip hetatm.
                continue
            resname = sample_res.get_id()[1]
            key = str(resname) + chain
            chain_res[chain].append(key)
            if key in interface:
                for a in atom_for_sup:
                    atom_key = key + '.' + a
                    if a in sample_res and atom_key in atoms_def_in_both:
                        sample_atoms.append(sample_res[a])
                common_interface.append(key)

    #print inter_pairs

    chain_ref = {}
    common_residues = []

    # Iterate of all chains in the model in order to find all residues
    for ref_chain in ref_model:
        # Iterate of all residues in each model in order to find proper atoms
        #  print dir(ref_chain)
        chain = ref_chain.id
        if chain not in chain_ref.keys():
            chain_ref[chain] = []
        for ref_res in ref_chain:
            if ref_res.get_id()[0] != ' ':  #Skip hetatm.
                continue
            resname = ref_res.get_id()[1]
            key = str(resname) + chain

            #print ref_res
            #      print key
            # print chain_res.values()
            if key in chain_res[chain]:  # if key is present in sample
                #print key
                for a in atom_for_sup:
                    atom_key = key + '.' + a
                    if a in ref_res and atom_key in atoms_def_in_both:
                        chain_ref[chain].append(ref_res[a])
                        common_residues.append(key)
                    #chain_sample.append((ref_res['CA'])
            if key in common_interface:
                # Check if residue number ( .get_id() ) is in the list
                # Append CA atom to list
                #print key
                for a in atom_for_sup:
                    atom_key = key + '.' + a
                    #print atom_key
                    if a in ref_res and atom_key in atoms_def_in_both:
                        ref_atoms.append(ref_res[a])

    #get the ones that are present in native
    chain_sample = {}
    for sample_chain in sample_model:
        chain = sample_chain.id
        if chain not in chain_sample.keys():
            chain_sample[chain] = []
        for sample_res in sample_chain:
            if sample_res.get_id()[0] != ' ':  #Skip hetatm.
                continue
            resname = sample_res.get_id()[1]
            key = str(resname) + chain
            if key in common_residues:
                for a in atom_for_sup:
                    atom_key = key + '.' + a
                    if a in sample_res and atom_key in atoms_def_in_both:
                        chain_sample[chain].append(sample_res[a])

        #if key in common_residues:
        #     print key
        #sample_atoms.append(sample_res['CA'])
        #common_interface.append(key)

    assert len(ref_atoms) != 0, "length of native is zero"
    assert len(sample_atoms) != 0, "length of model is zero"
    assert len(ref_atoms) == len(
        sample_atoms
    ), "Different number of atoms in native and model %d %d\n" % (
        len(ref_atoms), len(sample_atoms))

    super_imposer = Bio.PDB.Superimposer()
    super_imposer.set_atoms(ref_atoms, sample_atoms)
    super_imposer.apply(sample_model.get_atoms())

    # Print RMSD:
    irms = super_imposer.rms

    (chain1, chain2) = chain_sample.keys()

    ligand_chain = chain1
    receptor_chain = chain2
    len1 = len(chain_res[chain1])
    len2 = len(chain_res[chain2])

    assert len1 != 0, "%s chain has zero length!\n" % chain1
    assert len2 != 0, "%s chain has zero length!\n" % chain2

    class1 = 'ligand'
    class2 = 'receptor'
    if (len(chain_sample[chain1]) > len(chain_sample[chain2])):
        receptor_chain = chain1
        ligand_chain = chain2
        class1 = 'receptor'
        class2 = 'ligand'

    #print len1
    #print len2
    #print chain_sample.keys()

    #Set to align on receptor
    assert len(chain_ref[receptor_chain]) == len(
        chain_sample[receptor_chain]
    ), "Different number of atoms in native and model receptor (chain %c) %d %d\n" % (
        receptor_chain, len(
            chain_ref[receptor_chain]), len(chain_sample[receptor_chain]))

    super_imposer.set_atoms(chain_ref[receptor_chain],
                            chain_sample[receptor_chain])
    super_imposer.apply(sample_model.get_atoms())
    receptor_chain_rms = super_imposer.rms
    #print receptor_chain_rms
    #print dir(super_imposer)
    #print chain1_rms

    #Grep out the transformed ligand coords

    #print ligand_chain

    #print chain_ref[ligand_chain]
    #print chain_sample[ligand_chain]
    #l1=len(chain_ref[ligand_chain])
    #l2=len(chain_sample[ligand_chain])

    assert len(chain_ref[ligand_chain]) != 0 or len(
        chain_sample[ligand_chain]
    ) != 0, "Zero number of equivalent atoms in native and model ligand (chain %s) %d %d.\nCheck that the residue numbers in model and native is consistent\n" % (
        ligand_chain, len(
            chain_ref[ligand_chain]), len(chain_sample[ligand_chain]))

    assert len(chain_ref[ligand_chain]) == len(
        chain_sample[ligand_chain]
    ), "Different number of atoms in native and model ligand (chain %c) %d %d\n" % (
        ligand_chain, len(
            chain_ref[ligand_chain]), len(chain_sample[ligand_chain]))

    coord1 = np.array([atom.coord for atom in chain_ref[ligand_chain]])
    coord2 = np.array([atom.coord for atom in chain_sample[ligand_chain]])

    #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]])
    #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]])

    #print len(coord1)
    #print len(coord2)

    sup = SVDSuperimposer()
    Lrms = sup._rms(
        coord1,
        coord2)  #using the private _rms function which does not superimpose

    #super_imposer.set_atoms(chain_ref[ligand_chain], chain_sample[ligand_chain])
    #super_imposer.apply(sample_model.get_atoms())
    #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]])
    #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]])
    #Rrms= sup._rms(coord1,coord2)
    #should give same result as above line
    #diff = coord1-coord2
    #l = len(diff) #number of atoms
    #from math import sqrt
    #print sqrt(sum(sum(diff*diff))/l)
    #print np.sqrt(np.sum(diff**2)/l)
    DockQ = (float(fnat) + 1 / (1 + (irms / 1.5) * (irms / 1.5)) + 1 /
             (1 + (Lrms / 8.5) * (Lrms / 8.5))) / 3
    dict = {}
    dict['DockQ'] = DockQ
    dict['irms'] = irms
    dict['Lrms'] = Lrms
    dict['fnat'] = fnat
    dict['nat_correct'] = nat_correct
    dict['nat_total'] = nat_total

    dict['fnonnat'] = fnonnat
    dict['nonnat_count'] = nonnat_count
    dict['model_total'] = model_total

    dict['chain1'] = chain1
    dict['chain2'] = chain2
    dict['len1'] = len1
    dict['len2'] = len2
    dict['class1'] = class1
    dict['class2'] = class2

    return dict
Beispiel #3
0
def calc_DockQ(model,native,use_CA_only=False):
    
    exec_path=os.path.dirname(os.path.abspath(sys.argv[0]))    
    atom_for_sup=['CA','C','N','O']
    if(use_CA_only):
        atom_for_sup=['CA']

    cmd_fnat=exec_path + '/fnat ' + model + ' ' + native + ' 5'
    #cmd_interface=exec_path + '/fnat ' + model + ' ' + native + ' 10 backbone'
    cmd_interface=exec_path + '/fnat ' + model + ' ' + native + ' 10'


    #fnat_out = os.popen(cmd_fnat).readlines()
    fnat_out = commands.getoutput(cmd_fnat)
#    sys.exit()
    (fnat,nat_correct,nat_total,fnonnat,nonnat_count,model_total,interface5A)=parse_fnat(fnat_out)
    assert fnat!=-1, "Error running cmd: %s\n" % (cmd_fnat)
#    inter_out = os.popen(cmd_interface).readlines()
    inter_out = commands.getoutput(cmd_interface)
    (fnat_bb,nat_correct_bb,nat_total_bb,fnonnat_bb,nonnat_count_bb,model_total_bb,interface)=parse_fnat(inter_out)
    assert fnat_bb!=-1, "Error running cmd: %s\n" % (cmd_interface)

    #print fnat
    #Use same interface as for fnat for iRMS
    #interface=interface5A


    # Start the parser
    pdb_parser = Bio.PDB.PDBParser(QUIET = True)

    # Get the structures
    ref_structure = pdb_parser.get_structure("reference", native)
    sample_structure = pdb_parser.get_structure("model", model)

    # Use the first model in the pdb-files for alignment
    # Change the number 0 if you want to align to another structure
    ref_model    = ref_structure[0]
    sample_model = sample_structure[0]

    # Make a list of the atoms (in the structures) you wish to align.
    # In this case we use CA atoms whose index is in the specified range
    ref_atoms = []
    sample_atoms = []

    common_interface=[]

    chain_res={}


    #find atoms common in both sample and native
    atoms_def_sample=[]
    atoms_def_in_both=[]
    #first read in sample
    for sample_chain in sample_model:
#        print sample_chain
        chain=sample_chain.id
#        print chain
        for sample_res in sample_chain:
           # print sample_res
            if sample_res.get_id()[0] != ' ': #Skip hetatm.
                continue
            resname=sample_res.get_id()[1]
            key=str(resname) + chain
            for a in atom_for_sup:
                atom_key=key + '.' + a
                if a in sample_res:
                    if atom_key in atoms_def_sample:
                        print atom_key + ' already added (MODEL)!!!'
                    atoms_def_sample.append(atom_key)

    #then read in native also present in sample
    for ref_chain in ref_model:
        chain=ref_chain.id
        for ref_res in ref_chain:
            #print ref_res
            if ref_res.get_id()[0] != ' ': #Skip hetatm.
#                print ref_res.get_id()
                continue
            resname=ref_res.get_id()[1]
            key=str(resname) + chain
            for a in atom_for_sup:
                atom_key=key + '.' + a
                if a in ref_res and atom_key in atoms_def_sample:
                    if atom_key in atoms_def_in_both:
                        print atom_key + ' already added (Native)!!!' 
                    atoms_def_in_both.append(atom_key)


#    print atoms_def_in_both
    for sample_chain in sample_model:
        chain=sample_chain.id
        if chain not in chain_res.keys():
            chain_res[chain]=[]
        for sample_res in sample_chain:
            if sample_res.get_id()[0] != ' ': #Skip hetatm.
                continue
            resname=sample_res.get_id()[1]
            key=str(resname) + chain
            chain_res[chain].append(key)
            if key in interface:
                for a in atom_for_sup:
                    atom_key=key + '.' + a
                    if a in sample_res and atom_key in atoms_def_in_both:
                        sample_atoms.append(sample_res[a])
                common_interface.append(key)

    #print inter_pairs

    chain_ref={}
    common_residues=[]



    # Iterate of all chains in the model in order to find all residues
    for ref_chain in ref_model:
        # Iterate of all residues in each model in order to find proper atoms
        #  print dir(ref_chain)
        chain=ref_chain.id
        if chain not in chain_ref.keys():
            chain_ref[chain]=[]
        for ref_res in ref_chain:
            if ref_res.get_id()[0] != ' ': #Skip hetatm.
                continue
            resname=ref_res.get_id()[1]
            key=str(resname) + chain

            #print ref_res
            #      print key
            # print chain_res.values()
            if key in chain_res[chain]: # if key is present in sample
                #print key
                for a in atom_for_sup:
                    atom_key=key + '.' + a
                    if a in ref_res and atom_key in atoms_def_in_both:
                        chain_ref[chain].append(ref_res[a])
                        common_residues.append(key)
                      #chain_sample.append((ref_res['CA'])
            if key in common_interface:
              # Check if residue number ( .get_id() ) is in the list
              # Append CA atom to list
                #print key  
                for a in atom_for_sup:
                    atom_key=key + '.' + a
                    #print atom_key
                    if a in ref_res and atom_key in atoms_def_in_both:
                        ref_atoms.append(ref_res[a])



    #get the ones that are present in native        
    chain_sample={}
    for sample_chain in sample_model:
        chain=sample_chain.id
        if chain not in chain_sample.keys():
            chain_sample[chain]=[]
        for sample_res in sample_chain:
            if sample_res.get_id()[0] != ' ': #Skip hetatm.
                continue
            resname=sample_res.get_id()[1]
            key=str(resname) + chain
            if key in common_residues:
                for a in atom_for_sup:
                    atom_key=key + '.' + a
                    if a in sample_res and atom_key in atoms_def_in_both:
                        chain_sample[chain].append(sample_res[a])

        #if key in common_residues:
        #     print key  
        #sample_atoms.append(sample_res['CA'])
        #common_interface.append(key)


    assert len(ref_atoms)!=0, "length of native is zero"
    assert len(sample_atoms)!=0, "length of model is zero"
    assert len(ref_atoms)==len(sample_atoms), "Different number of atoms in native and model %d %d\n" % (len(ref_atoms),len(sample_atoms))

    super_imposer = Bio.PDB.Superimposer()
    super_imposer.set_atoms(ref_atoms, sample_atoms)
    super_imposer.apply(sample_model.get_atoms())

    # Print RMSD:
    irms=super_imposer.rms

    (chain1,chain2)=chain_sample.keys()

    ligand_chain=chain1
    receptor_chain=chain2
    len1=len(chain_res[chain1])
    len2=len(chain_res[chain2])

    assert len1!=0, "%s chain has zero length!\n" % chain1
    assert len2!=0, "%s chain has zero length!\n" % chain2

    class1='ligand'
    class2='receptor'
    if(len(chain_sample[chain1]) > len(chain_sample[chain2])):
        receptor_chain=chain1
        ligand_chain=chain2
        class1='receptor'
        class2='ligand'



    #print len1
    #print len2
    #print chain_sample.keys()

    #Set to align on receptor
    assert len(chain_ref[receptor_chain])==len(chain_sample[receptor_chain]), "Different number of atoms in native and model receptor (chain %c) %d %d\n" % (receptor_chain,len(chain_ref[receptor_chain]),len(chain_sample[receptor_chain]))

    super_imposer.set_atoms(chain_ref[receptor_chain], chain_sample[receptor_chain])
    super_imposer.apply(sample_model.get_atoms())
    receptor_chain_rms=super_imposer.rms
    #print receptor_chain_rms
    #print dir(super_imposer)
    #print chain1_rms

    #Grep out the transformed ligand coords

    #print ligand_chain

    #print chain_ref[ligand_chain]
    #print chain_sample[ligand_chain]
    #l1=len(chain_ref[ligand_chain])
    #l2=len(chain_sample[ligand_chain])




    assert len(chain_ref[ligand_chain])!=0 or len(chain_sample[ligand_chain])!=0, "Zero number of equivalent atoms in native and model ligand (chain %s) %d %d.\nCheck that the residue numbers in model and native is consistent\n" % (ligand_chain,len(chain_ref[ligand_chain]),len(chain_sample[ligand_chain]))


    assert len(chain_ref[ligand_chain])==len(chain_sample[ligand_chain]), "Different number of atoms in native and model ligand (chain %c) %d %d\n" % (ligand_chain,len(chain_ref[ligand_chain]),len(chain_sample[ligand_chain]))

    coord1=np.array([atom.coord for atom in chain_ref[ligand_chain]])
    coord2=np.array([atom.coord for atom in chain_sample[ligand_chain]])

    #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]])
    #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]])

    #print len(coord1)
    #print len(coord2)

    sup=SVDSuperimposer()
    Lrms = sup._rms(coord1,coord2) #using the private _rms function which does not superimpose


    #super_imposer.set_atoms(chain_ref[ligand_chain], chain_sample[ligand_chain])
    #super_imposer.apply(sample_model.get_atoms())
    #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]])
    #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]])
    #Rrms= sup._rms(coord1,coord2)
    #should give same result as above line
    #diff = coord1-coord2
    #l = len(diff) #number of atoms
    #from math import sqrt
    #print sqrt(sum(sum(diff*diff))/l)
    #print np.sqrt(np.sum(diff**2)/l)
    DockQ=(float(fnat) + 1/(1+(irms/1.5)*(irms/1.5)) + 1/(1+(Lrms/8.5)*(Lrms/8.5)))/3
    dict={}
    dict['DockQ']=DockQ
    dict['irms']=irms
    dict['Lrms']=Lrms
    dict['fnat']=fnat
    dict['nat_correct']=nat_correct
    dict['nat_total']=nat_total

    dict['fnonnat']=fnonnat
    dict['nonnat_count']=nonnat_count
    dict['model_total']=model_total
    
    dict['chain1']=chain1
    dict['chain2']=chain2
    dict['len1']=len1
    dict['len2']=len2
    dict['class1']=class1
    dict['class2']=class2
    
    return dict