Beispiel #1
0
def AverageProtectionFactors(ProteinStructure):
	# Calculate the average protection factor
	AverageLogProtectionFactors = {}
	
	for ProteinModel in ProteinStructure:

		for Chain in ProteinModel:
			ChainID = Chain.get_id()

			for Residue in Chain:

				if is_aa(Residue.get_resname(), standard = True):
					LogProtectionFactors = [SecondProteinModel[ChainID][Residue.get_id()]["CA"].get_occupancy() for SecondProteinModel in ProteinStructure]

					AverageLogProtectionFactors[(Chain, Residue)] = sum(LogProtectionFactors) / float(len(LogProtectionFactors))

	# And assign them to each residue
	for ProteinModel in ProteinStructure:

		for Chain in ProteinModel:

			for Residue in Chain:

				if is_aa(Residue.get_resname(), standard = True):

					for Atom in Residue:
						Atom.set_occupancy(AverageLogProtectionFactors[(Chain, Residue)])

	return
    def __init__(self, fasta_align, m1, m2, si=0, sj=1):
        """Initialise.

        Attributes:

        - fasta_align - Alignment object
        - m1, m2 - two models
        - si, sj - the sequences in the Alignment object that
          correspond to the structures

        """
        l = fasta_align.get_alignment_length()
        # Get the residues in the models
        rl1 = Selection.unfold_entities(m1, 'R')
        rl2 = Selection.unfold_entities(m2, 'R')
        # Residue positions
        p1 = 0
        p2 = 0
        # Map equivalent residues to each other
        map12 = {}
        map21 = {}
        # List of residue pairs (None if -)
        duos = []
        for i in range(0, l):
            column = fasta_align[:, i]
            aa1 = column[si]
            aa2 = column[sj]
            if aa1 != "-":
                # Position in seq1 is not -
                while True:
                    # Loop until an aa is found
                    r1 = rl1[p1]
                    p1 = p1 + 1
                    if is_aa(r1):
                        break
                self._test_equivalence(r1, aa1)
            else:
                r1 = None
            if aa2 != "-":
                # Position in seq2 is not -
                while True:
                    # Loop until an aa is found
                    r2 = rl2[p2]
                    p2 = p2 + 1
                    if is_aa(r2):
                        break
                self._test_equivalence(r2, aa2)
            else:
                r2 = None
            if r1:
                # Map residue in seq1 to its equivalent in seq2
                map12[r1] = r2
            if r2:
                # Map residue in seq2 to its equivalent in seq1
                map21[r2] = r1
            # Append aligned pair (r is None if gap)
            duos.append((r1, r2))
        self.map12 = map12
        self.map21 = map21
        self.duos = duos
    def get_contact_map(self, chain_id):
        '''
            Input:
                self: Use Biopython.PDB structure which has been stored in an object variable
                chain_id  : String (usually in ['A','B', 'C' ...]. The number of chains
                        depends on the specific protein and the resulting structure)
            Return:
                Return a complete contact map (see description in exercise sheet) 
                for a given chain in a Biopython.PDB structure as numpy array. 
                The values in the matrix describe the c-alpha distance between all residues 
                in a chain of a Biopython.PDB structure.
                Only integer values of the distance have to be given (see below).
        '''
        length = self.get_number_of_residues(chain_id)
        contact_map = np.empty((length, length), dtype=self.dtype)
        contact_map[:] = np.nan  # initialize as nan

        chain = self.structure[0][chain_id]

        for i, residue_1 in enumerate(chain):
            for j, residue_2 in enumerate(chain):
                # create only lower triangle and diagonale of contact map as it is symmetric
                # check whether current residue is an AA. Skip e.g. water-molecules
                if i <= j and is_aa(residue_1) and is_aa(residue_2):
                    ca_dist = residue_1['CA'] - residue_2['CA']
                    contact_map[i, j] = ca_dist
                    contact_map[j, i] = ca_dist

        return contact_map.astype(
            np.int)  # return as int to make comparison more robust
    def __init__(self, fasta_align, m1, m2, si=0, sj=1):
        """Initialize.

        Attributes:
         - fasta_align - Alignment object
         - m1, m2 - two models
         - si, sj - the sequences in the Alignment object that
           correspond to the structures

        """
        length = fasta_align.get_alignment_length()
        # Get the residues in the models
        rl1 = Selection.unfold_entities(m1, 'R')
        rl2 = Selection.unfold_entities(m2, 'R')
        # Residue positions
        p1 = 0
        p2 = 0
        # Map equivalent residues to each other
        map12 = {}
        map21 = {}
        # List of residue pairs (None if -)
        duos = []
        for i in range(length):
            column = fasta_align[:, i]
            aa1 = column[si]
            aa2 = column[sj]
            if aa1 != "-":
                # Position in seq1 is not -
                while True:
                    # Loop until an aa is found
                    r1 = rl1[p1]
                    p1 = p1 + 1
                    if is_aa(r1):
                        break
                self._test_equivalence(r1, aa1)
            else:
                r1 = None
            if aa2 != "-":
                # Position in seq2 is not -
                while True:
                    # Loop until an aa is found
                    r2 = rl2[p2]
                    p2 = p2 + 1
                    if is_aa(r2):
                        break
                self._test_equivalence(r2, aa2)
            else:
                r2 = None
            if r1:
                # Map residue in seq1 to its equivalent in seq2
                map12[r1] = r2
            if r2:
                # Map residue in seq2 to its equivalent in seq1
                map21[r2] = r1
            # Append aligned pair (r is None if gap)
            duos.append((r1, r2))
        self.map12 = map12
        self.map21 = map21
        self.duos = duos
    def computeOneFile(self, fileName):
        '''
      Computes distance for each pair of aminoacids for a given pdb file
      @param fileName: str. fname to pdb file
    '''
        prefixAndChainTypeId = (fileName.split("/")[-1]).split(".pdb")[0]
        outName = os.path.join(self.outPath, prefixAndChainTypeId + ".distMat")
        if os.path.isfile(outName):
            print("Already computed Distance Maps")
            return 0
        structure = self.parser.get_structure(prefixAndChainTypeId, fileName)
        structCenterMass = self.getStructCenterMass(structure)

        try:
            outFile = open(outName, "w")
            outFile.write(
                "chainId1 structResId1 chainId2 structResId2 distance angle_to_protCM\n"
            )
            for res1 in structure[0].get_residues():
                if is_aa(res1, standard=True):
                    ##        print res, res.get_full_id()
                    structId1, modelId1, chainId1, resId1 = res1.get_full_id()
                    resId1 = list(resId1)
                    resId1[1] = str(resId1[1])
                    resId1 = "".join(resId1[1:])
                    if chainId1 == " ":
                        chainId1 = "*"
                    for res2 in structure[0].get_residues():
                        if is_aa(res2, standard=True):
                            ##        print( res, res.get_full_id())
                            structId2, modelId2, chainId2, resId2 = res2.get_full_id(
                            )
                            resId2 = list(resId2)
                            resId2[1] = str(resId2[1])
                            resId2 = "".join(resId2[1:])
                            if chainId2 == " ":
                                chainId2 = "*"
                            magnitude = self.getMagnitude(
                                res1, res2, structCenterMass)
                            #              print( chainId1, resId1, chainId2, resId2, magnitude)
                            #              a= raw_input()
                            outFile.write(
                                chainId1 + " " + resId1 + " " + chainId2 +
                                " " + resId2 + " " +
                                " ".join([str(val)
                                          for val in magnitude]) + "\n")
            outFile.close()
        except (KeyboardInterrupt, Exception):
            print("Exception happend computing %s" % outName)
            tryToRemove(outName)
            raise
        return 0
Beispiel #6
0
    def __init__(self, model, radius=12.0, offset=0):
        """Initialize.

        A residue's exposure is defined as the number of CA atoms around
        that residues CA atom. A dictionary is returned that uses a L{Residue}
        object as key, and the residue exposure as corresponding value.

        :param model: the model that contains the residues
        :type model: L{Model}

        :param radius: radius of the sphere (centred at the CA atom)
        :type radius: float

        :param offset: number of flanking residues that are ignored in
                       the calculation of the number of neighbors
        :type offset: int

        """
        assert(offset >= 0)
        ppb = CaPPBuilder()
        ppl = ppb.build_peptides(model)
        fs_map = {}
        fs_list = []
        fs_keys = []
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                fs = 0
                r1 = pp1[i]
                if not is_aa(r1) or not r1.has_id('CA'):
                    continue
                ca1 = r1['CA']
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i - j) <= offset:
                            continue
                        r2 = pp2[j]
                        if not is_aa(r2) or not r2.has_id('CA'):
                            continue
                        ca2 = r2['CA']
                        d = (ca2 - ca1)
                        if d < radius:
                            fs += 1
                res_id = r1.get_id()
                chain_id = r1.get_parent().get_id()
                # Fill the 3 data structures
                fs_map[(chain_id, res_id)] = fs
                fs_list.append((r1, fs))
                fs_keys.append((chain_id, res_id))
                # Add to xtra
                r1.xtra['EXP_CN'] = fs
        AbstractPropertyMap.__init__(self, fs_map, fs_keys, fs_list)
Beispiel #7
0
    def __init__(self, model, radius=12.0, offset=0):
        """Initialize.

        A residue's exposure is defined as the number of CA atoms around
        that residues CA atom. A dictionary is returned that uses a L{Residue}
        object as key, and the residue exposure as corresponding value.

        :param model: the model that contains the residues
        :type model: L{Model}

        :param radius: radius of the sphere (centred at the CA atom)
        :type radius: float

        :param offset: number of flanking residues that are ignored in
                       the calculation of the number of neighbors
        :type offset: int

        """
        assert (offset >= 0)
        ppb = CaPPBuilder()
        ppl = ppb.build_peptides(model)
        fs_map = {}
        fs_list = []
        fs_keys = []
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                fs = 0
                r1 = pp1[i]
                if not is_aa(r1) or not r1.has_id('CA'):
                    continue
                ca1 = r1['CA']
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i - j) <= offset:
                            continue
                        r2 = pp2[j]
                        if not is_aa(r2) or not r2.has_id('CA'):
                            continue
                        ca2 = r2['CA']
                        d = (ca2 - ca1)
                        if d < radius:
                            fs += 1
                res_id = r1.get_id()
                chain_id = r1.get_parent().get_id()
                # Fill the 3 data structures
                fs_map[(chain_id, res_id)] = fs
                fs_list.append((r1, fs))
                fs_keys.append((chain_id, res_id))
                # Add to xtra
                r1.xtra['EXP_CN'] = fs
        AbstractPropertyMap.__init__(self, fs_map, fs_keys, fs_list)
Beispiel #8
0
    def __init__(self, align, m1, m2):
        """Produces a structural alignment of two models
        Input:
        - fasta_align - Alignment object
        - m1, m2 - two models
        - si, sj - the sequences in the Alignment object that correspond to the structures
        """
        length = align[4]-align[3]
        # Get the residues in the models
        rl1 = Selection.unfold_entities(m1, 'R')
        rl2 = Selection.unfold_entities(m2, 'R')
        # Residue positions
        p1 = 0
        p2 = 0
        # Map equivalent residues to each other
        map12 = {}
        map21 = {}
        residue_pairs = []
        for i in range(length):
            aa1 = align[0][i]
            aa2 = align[1][i]
            if aa1 != "-":
                while True:
                    r1 = rl1[p1]
                    p1 = p1 + 1
                    if is_aa(r1):
                        break
                    self._test_equivalence(r1, aa1)
            else:
                r1 = None
            if aa2 != "-":
                while True:
                    r2 = rl2[p2]
                    p2 = p2 +1
                    if is_aa(r2):
                        break
                    self._test_equivalence(r2, aa2)
            else:
                r2 = None
            if r1:
                map12[r1] = r2
            if r2:
                map21[r2] = r1

            residue_pairs.append((r1,r2))
            self.map12 = map12
            self.map21 = map21
            self.residue_pairs= residue_pairs
Beispiel #9
0
    def get_sequence(self, chain_id):
        '''
            Input:
                self: Use Biopython.PDB structure which has been stored in an object variable
                chain_id  : String (usually in ['A','B', 'C' ...]. The number of chains
                        depends on the specific protein and the resulting structure)
            Return:
                Return the amino acid sequence (single-letter alphabet!) of a given chain (chain_id)
                in a Biopython.PDB structure as a string.
        '''
        residue = []
        for chain in self.structure.get_chains():
            if chain.id == chain_id:
                for r in chain.get_residues():
                    residue.append(r.get_resname())

        ### convert residue to amino acids
        aa = []
        for r in residue:
            if is_aa(r, standard=True):
                aa.append(three_to_one(r))

        sequence = ""
        for a in aa:
            sequence += a

        return sequence
Beispiel #10
0
    def from_structure(cls, original, filter_residues):
        """
        Loads structure as a protein, exposing
        protein-specific methods.
        """
        P = cls(original.id)
        P.full_id = original.full_id

        for child in original.child_dict.values():
            copycat = deepcopy(child)
            P.add(copycat)

        # Discriminate non-residues (is_aa function)
        remove_list = []
        if filter_residues:
            for model in P:
                for chain in model:
                    for residue in chain:
                        if residue.get_id()[0] != ' ' or not is_aa(residue):
                            remove_list.append(residue)

            for residue in remove_list:
                residue.parent.detach_child(residue.id)

            for chain in P.get_chains():  # Remove empty chains
                if not len(chain.child_list):
                    model.detach_child(chain.id)

        P.header = deepcopy(original.header)
        P.xtra = deepcopy(original.xtra)

        return P
    def get_bfactors( self, chain_id ):
        '''
            Input:
                self: Use Biopython.PDB structure which has been stored in an object variable
                chain_id  : String (usually in ['A','B', 'C' ...]. The number of chains
                        depends on the specific protein and the resulting structure)
            Return:
                Return the B-Factors for all residues in a chain of a Biopython.PDB structure.
                The B-Factors describe the mobility of an atom or a residue.
                In a Biopython.PDB structure B-Factors are given for each atom in a residue.
                Calculate the mean B-Factor for a residue by averaging over the B-Factor 
                of all atoms in a residue.
                Sometimes B-Factors are not available for a certain residue; 
                (e.g. the residue was not resolved); insert np.nan for those cases.
            
                Finally normalize your B-Factors using Standard scores (zero mean, unit variance).
                You have to use np.nanmean, np.nanvar etc. if you have nan values in your array.
                The returned data structure has to be a numpy array rounded again to integer.
        '''

        means = []

        for res in self.structure[0][chain_id]:
            if is_aa(res.get_resname()):
                values = []
                for atom in res:
                    values.append(atom.get_bfactor())
                means.append(np.mean(values))
        b_factors = zscore(means)

        return b_factors.astype(np.int64)  # return rounded (integer) values
    def get_contact_map( self, chain_id ):
        '''
            Input:
                self: Use Biopython.PDB structure which has been stored in an object variable
                chain_id  : String (usually in ['A','B', 'C' ...]. The number of chains
                        depends on the specific protein and the resulting structure)
            Return:
                Return a complete contact map (see description in exercise sheet) 
                for a given chain in a Biopython.PDB structure as numpy array. 
                The values in the matrix describe the c-alpha distance between all residues 
                in a chain of a Biopython.PDB structure.
                Only integer values of the distance have to be given (see below).
        '''
        aas = []
        for res in self.structure[0][chain_id].get_residues():
            if is_aa(res):
                aas.append(res)

        length = len(self.get_sequence(chain_id))
        contact_map = np.zeros((length, length), dtype=np.float32)

        for i in range(1, length+1):
            for j in range(1, length+1):
                contact_map[i-1][j-1] = self.get_ca_distance_list(aas, i-1, j-1)

        return contact_map.astype(np.int64)  # return rounded (integer) values
Beispiel #13
0
def GetResidueDepPDB(pdb, pdbfile):
    s  = GetStructure(pdb)
    model = s[0]
    residuelist = Selection.unfold_entities(model, 'R')
    try:
        surface = get_surface(pdbfile, PDBTOXYZ, MSMS)
    except:
        print "cannot get surface for " + pdbfile
        return
    content = ""
    for residue in residuelist:
        if not is_aa(residue):
            continue
        # minimun average depth for all atoms
        resid   = residue.get_id()
        resname = residue.get_resname()
        chainid = residue.get_parent().get_id()
        try:
            rd = residue_depth(residue, surface)
        except:
            continue
        ca_rd = ca_depth(residue, surface)
        info    = [pdb, chainid, resid[1], resname, str(rd), str(ca_rd)]
        for each in info:
            if not each:
                continue
        #print info
        newline = "\t".join(map(str, info)) + "\n"
        content = content + newline

    mutex_writefile.acquire()
    outobj = open(OUT, "a")
    outobj.write(content)
    outobj.close()
    mutex_writefile.release()
Beispiel #14
0
 def is_peptide(self):
     """Check if component comes from a polypeptide"""
     if self.structure:
         return self.is_polymer and all([
             is_aa(res.get_resname())
             for res in self.structure.get_residues()
         ])
Beispiel #15
0
def getPDBSequence(pdb_name, pdb_path, chain):
    logging.info("getPDBSequence pdb " + pdb_name + " cadena " + chain)
    from Bio.PDB.PDBParser import PDBParser
    from Bio.PDB.Polypeptide import three_to_one
    from Bio.PDB.Polypeptide import is_aa
    residue_position = []
    residue_name = list()
    try:
        parser = PDBParser(PERMISSIVE=1)
        structure = parser.get_structure(pdb_name, pdb_path)
        model = structure[0]
        chain = model[chain]
        for residue in chain:
            if is_aa(residue.get_resname(), standard=True):
                residue_name.append(three_to_one(residue.get_resname()))
                residue_position.append(residue.get_full_id()[3][1])
            #else:
            #residue_name.append("X")
            #residue_position.append(residue.get_full_id()[3][1])
            #raise Exception("Secuencia no valida, error en la posicion: " + str(residue.get_full_id()[3][1]))

    except Exception as inst:
        print inst
        logging.error(
            "Error no controlado intentando leer la sequencia del pdb " +
            pdb_name + " cadena " + chain + " path " + pdb_path)
        raise Exception("PDB Invalido pdb " + pdb_name + " cadena " + chain +
                        " path " + pdb_path)
    return residue_position, residue_name
    '''
Beispiel #16
0
def _get_ca_list(chain):
    """

    :param chain: The structure chain object
    :return:
    """
    ca_list = []  # [<ca_atom_object or None>, ...]
    residues = [
    ]  # [(<index>, <insertion_code>, <3_letter_residue_name>_upper>), ...]
    sequence = ""

    for residue in chain:
        if is_aa(residue):
            _, _, chain_id, res_id = residue.get_full_id()
            try:
                residues.append((res_id[1], res_id[2].strip(),
                                 residue.get_resname().upper()))
                sequence += IUPACData.protein_letters_3to1_extended.get(
                    residue.get_resname().capitalize(), 'X')
                ca_list.append(residue['CA'])
            except KeyError:
                logging.warning("Failed to find CA in residue {}".format(
                    residue.get_full_id()))

    return residues, sequence, ca_list
Beispiel #17
0
 def residues_map(self, selected_chain=None, standard_aa=True):
     rmap = {}
     for chain in self.struct.get_chains():
         if (not selected_chain) or (selected_chain == chain.id):
             residues = [x for x in chain.get_residues() if is_aa(x, standard=standard_aa)]
             rmap[chain.id] = {i: x.id for i, x in enumerate(residues)}
     return rmap
def CalculateExchangeRates(ProteinModel, Temperature, pH, ReferenceData):

    # Loop over all residue chains in the model
    for Chain in ProteinModel:
        Sequence = []
        ChainID = Chain.get_id()

        # Loop over all residues
        for Residue in Chain:
            ResidueName = Residue.get_resname()

            if is_aa(ResidueName, standard=True):
                Sequence.append(three_to_one(ResidueName))

            else:
                Sequence.append("?")

        # Estimate intrinsic exchange rates
        if not Sequence == []:
            IntrinsicEnchangeRates = CalculateExchangeRatesForASingleChain(
                Sequence, Temperature, pH, ReferenceData)

            # Assign intrinsic exchange rates as b-factors
            i = 0

            for Residue in Chain:
                IntrinsicExchangeRate = IntrinsicEnchangeRates[i]

                for Atom in Residue:
                    Atom.set_bfactor(IntrinsicExchangeRate)

                i += 1

    return
Beispiel #19
0
    def __init__(self, model, pdb_file=None):
        """Initialize the class."""
        # Issue warning if pdb_file is given
        if pdb_file is not None:
            warnings.warn(
                "ResidueDepth no longer requires a pdb file. "
                "This argument will be removed in a future release "
                "of Biopython.", BiopythonDeprecationWarning)

        depth_dict = {}
        depth_list = []
        depth_keys = []
        # get_residue
        residue_list = Selection.unfold_entities(model, 'R')
        # make surface from PDB file using MSMS
        surface = get_surface(model)
        # calculate rdepth for each residue
        for residue in residue_list:
            if not is_aa(residue):
                continue
            rd = residue_depth(residue, surface)
            ca_rd = ca_depth(residue, surface)
            # Get the key
            res_id = residue.get_id()
            chain_id = residue.get_parent().get_id()
            depth_dict[(chain_id, res_id)] = (rd, ca_rd)
            depth_list.append((residue, (rd, ca_rd)))
            depth_keys.append((chain_id, res_id))
            # Update xtra information
            residue.xtra['EXP_RD'] = rd
            residue.xtra['EXP_RD_CA'] = ca_rd
        AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)
Beispiel #20
0
def splitOnePDB(fname, outPath):

  try:
    s= parser.get_structure(fname, fname)
  except Exception:
    print ("Error loading pdb")
    return 0
  banLenChains=[]    
  try:
    for chain in s[0]:
      badResInChain=0
      for res in  chain.get_list():
        if not is_aa(res,standard=True):
          badResInChain+=1
      chainLen= sum(1 for res in chain if "CA" in res) - badResInChain
      if chainLen < MIN_SEQ_LEN or chainLen > MAX_SEQ_LEN:
        print(chainLen)
        banLenChains.append(chain.get_id())
  except KeyError:
    print ("Not good model")
    return 0  
  for badChainId in banLenChains:
    s[0].detach_child(badChainId)

  receptorChainList= []
  ligandChainList= []
  if len( s[0].get_list())<2:
    print(s)
    print( s[0].get_list())
    print("Not enough good chains")
    return 0
  for chain1 in s[0]:

    tmpReceptorList=[]
    for chain2 in s[0]:
      if chain1!= chain2:
        tmpReceptorList.append(chain2)
    if len(tmpReceptorList)>1 or not tmpReceptorList[0] in ligandChainList:   
      ligandChainList.append(chain1)
      receptorChainList.append(tmpReceptorList)
    
  prefix= os.path.basename(fname).split(".")[0]
  for i, (ligandChain, receptorChains) in enumerate(zip(ligandChainList, receptorChainList)):
    io=PDBIO()
    ligandStruct= Structure(prefix+"ligand")
    ligandStruct.add(Model(0))
    ligandChain.set_parent(ligandStruct[0])
    ligandStruct[0].add(ligandChain)
    io.set_structure(ligandStruct)
    io.save(os.path.join(outPath,prefix+"-"+str(i)+"_l_u.pdb"))

    io=PDBIO()
    receptorStruct= Structure(prefix+"receptor")
    receptorStruct.add(Model(0))
    for receptorChain in receptorChains:
      receptorChain.set_parent(receptorStruct[0])    
      receptorStruct[0].add(receptorChain)
    io.set_structure(receptorStruct)
    io.save(os.path.join(outPath,prefix+"-"+str(i)+"_r_u.pdb"))
    print( "ligand:", ligandChain, "receptor:",receptorChains )
Beispiel #21
0
    def __init__(self, model, pdb_file=None):

        # Issue warning if pdb_file is given
        if pdb_file is not None:
            warnings.warn(("ResidueDepth no longer requires a pdb file."
                           " This argument will be removed in a future release"
                           " of Biopython."),
                           BiopythonDeprecationWarning)

        depth_dict = {}
        depth_list = []
        depth_keys = []
        # get_residue
        residue_list = Selection.unfold_entities(model, 'R')
        # make surface from PDB file using MSMS
        surface = get_surface(model)
        # calculate rdepth for each residue
        for residue in residue_list:
            if not is_aa(residue):
                continue
            rd = residue_depth(residue, surface)
            ca_rd = ca_depth(residue, surface)
            # Get the key
            res_id = residue.get_id()
            chain_id = residue.get_parent().get_id()
            depth_dict[(chain_id, res_id)] = (rd, ca_rd)
            depth_list.append((residue, (rd, ca_rd)))
            depth_keys.append((chain_id, res_id))
            # Update xtra information
            residue.xtra['EXP_RD'] = rd
            residue.xtra['EXP_RD_CA'] = ca_rd
        AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)
Beispiel #22
0
def get_pdb_sequence(input_pdb_file,
                     chain_id,
                     mapping_output=False,
                     with_gaps=False):
    """Gets the PDB sequence in a dictionary"""
    mapping = {}
    pdb_parser = PDBParser(PERMISSIVE=True, QUIET=True)
    structure = pdb_parser.get_structure(input_pdb_file, input_pdb_file)
    model = structure[0]
    chain = model[chain_id]
    residues = list(chain)
    for res in residues:
        # Remove alternative location residues
        if "CA" in res.child_dict and is_aa(res) and res.id[2] == ' ':
            try:
                mapping[res.id[1]] = three_to_one(res.get_resname())
            except KeyError:
                # Ignore non standard residues such as HIC, MSE, etc.
                pass

    if with_gaps:
        # Add missing gap residues by their residue number
        res_numbers = sorted(mapping.keys())
        start, end = res_numbers[0], res_numbers[-1]
        missing = sorted(set(range(start, end + 1)).difference(res_numbers))
        for m in missing:
            mapping[m] = '-'

    if mapping_output:
        return mapping
    else:
        return ''.join([mapping[k] for k in sorted(mapping.keys())])
Beispiel #23
0
def CalculateHydrogenBonds(ProteinModel, Filename, EnergyCutoff, PathToDSSP):
	# Run DSSP algorithm
	DSSPOutput = DSSP(ProteinModel, Filename, dssp = PathToDSSP)

	# Assign structure
	HydrogenBonds = {}
	TotalNumberOfHydrogenBonds = 0

	for Chain in ProteinModel:
		ChainID = Chain.get_id()
		
		for Residue in Chain:
			ResidueID = Residue.get_id()

			if is_aa(Residue.get_resname(), standard = True):
				HydrogenBonds[Chain, Residue] = 0

				try:
					DSSPEntry = DSSPOutput[(ChainID, ResidueID)]

					if float(DSSPEntry[7]) < EnergyCutoff:
						HydrogenBonds[Chain, Residue] += 1
						TotalNumberOfHydrogenBonds += 1

					if float(DSSPEntry[11]) < EnergyCutoff:
						HydrogenBonds[Chain, Residue] += 1
						TotalNumberOfHydrogenBonds += 1

				except:
					sys.stderr.write("No DSSP entry generated for amino acid residue " + str(ResidueID[1]) + ". Ignoring the residue. \n")

	sys.stdout.write(str(TotalNumberOfHydrogenBonds) + " backbone N-O hydrogen bonds.\n")

	return HydrogenBonds
Beispiel #24
0
    def __init__(self, model, msms_exec=None):
        """Initialize the class."""
        if msms_exec is None:
            msms_exec = "msms"

        depth_dict = {}
        depth_list = []
        depth_keys = []
        # get_residue
        residue_list = Selection.unfold_entities(model, "R")
        # make surface from PDB file using MSMS
        surface = get_surface(model, MSMS=msms_exec)
        # calculate rdepth for each residue
        for residue in residue_list:
            if not is_aa(residue):
                continue
            rd = residue_depth(residue, surface)
            ca_rd = ca_depth(residue, surface)
            # Get the key
            res_id = residue.get_id()
            chain_id = residue.get_parent().get_id()
            depth_dict[(chain_id, res_id)] = (rd, ca_rd)
            depth_list.append((residue, (rd, ca_rd)))
            depth_keys.append((chain_id, res_id))
            # Update xtra information
            residue.xtra["EXP_RD"] = rd
            residue.xtra["EXP_RD_CA"] = ca_rd
        AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)
Beispiel #25
0
    def create_sequence(self, pdb_code, pdb_path):
        pdb = PDB.objects.get(code=pdb_code)

        struct = PDBParser(PERMISSIVE=1,
                           QUIET=1).get_structure(pdb_code, pdb_path)
        for chain in struct[0].get_chains():

            residues = []
            for residue in chain.get_residues():

                if is_aa(residue, standard=True):
                    # alts = [a.get_altloc() for a in residue.get_atoms() if a.get_altloc()]
                    # if len(alts) > 1 :
                    #     print(pdb_code)
                    #     disordered_select
                    #     print("alternative residue %s from %s was removed from sequence" % (
                    #         str(residue.id), pdb_code
                    #     ))
                    # else:
                    residues.append(residue)

            if residues:
                seq = "".join([seq1(x.resname) for x in residues])
                start = str(residues[0].id[1])
                end = str(residues[-1].id[1])
                seqid = "_".join([pdb_code, chain.id, start, end])
                if not Bioentry.objects.filter(biodatabase=self.biodb,
                                               identifier=seqid).exists():
                    be = Bioentry(biodatabase=self.biodb,
                                  accession=seqid,
                                  identifier=seqid,
                                  name=pdb.code)
                    be.save()
                    Biosequence(bioentry=be, seq=seq, length=len(seq)).save()
    def Extract_coordinates_from_PDB(self, PDB_file, type):
        ''' Returns both the alpha carbon coordinates contained in the PDB file and the residues coordinates for the desired chains'''
        from Bio.PDB.PDBParser import PDBParser
        from Bio.PDB import MMCIFParser
        Name = ntpath.basename(PDB_file).split('.')[0]

        try:
            parser = PDB.PDBParser()
            structure = parser.get_structure('%s' % (Name), PDB_file)
        except:
            parser = MMCIFParser()
            structure = parser.get_structure('%s' % (Name), PDB_file)

        ############## Iterating over residues to extract all of them even if there is more than 1 chain
        if type == 'models':
            CoordinatesPerModel = []
            for model in structure:
                model_coord = []
                for chain in model:
                    for residue in chain:
                        if is_aa(residue.get_resname(), standard=True):
                            model_coord.append(residue['CA'].get_coord())
                CoordinatesPerModel.append(model_coord)

            return CoordinatesPerModel
        elif type == 'chains':
            CoordinatesPerChain = []
            for model in structure:
                for chain in model:
                    chain_coord = []
                    for residue in chain:
                        if is_aa(residue.get_resname(), standard=True):
                            chain_coord.append(residue['CA'].get_coord())
                    CoordinatesPerChain.append(chain_coord)
            return CoordinatesPerChain

        elif type == 'all':
            alpha_carbon_coordinates = []
            for chain in structure.get_chains():
                for residue in chain:
                    if is_aa(residue.get_resname(), standard=True):
                        # try:
                        alpha_carbon_coordinates.append(
                            residue['CA'].get_coord())
                    # except:
                    # pass
            return alpha_carbon_coordinates
 def get_sequence(self, chain_id):
     # extract every residue name (three letters) from a given chain in a PDB structure
     # return sequence as one-letter-code
     chain = self.structure[0][chain_id]
     return ''.join([
         three_to_one(residue.get_resname()) for residue in chain
         if is_aa(residue)
     ])
Beispiel #28
0
def get_pdb_sequence(structure):
    """
    Retrieves the AA sequence from a PDB structure.
    """

    _aainfo = lambda r: (r.id[1], aa3to1.get(r.resname, 'X'))
    seq = [_aainfo(r) for r in structure.get_residues() if is_aa(r)]
    return seq
Beispiel #29
0
def getSequenceStructure(s):
    seq = ""
    for r in s.get_residues():
        if is_aa(r.get_resname(), standard=True):
            seq += three_to_one(r.get_resname())
        else:
            seq += "G"
    return seq
Beispiel #30
0
def get_pdb_sequence_with_chains(structure):
    """
    Retrieves the AA sequence from a PDB structure. It's a list that looks like [(5, 'R', 'A'), (6, 'E', 'A'), (7, 'H', 'A'), (8, 'W', 'A'),...]
    """

    _aainfo = lambda r: (r.id[1], aa3to1.get(r.resname, 'X'),r.get_parent().get_id(),r.id[0],r.id[2])
    seq = [_aainfo(r) for r in structure.get_residues() if (is_aa(r) and r.has_id('CA'))]
    return seq
Beispiel #31
0
 def getSequenceFromChain(self, modelID, chainID):
     self.checkRead()
     seq = list()
     for model in self.structure:
         if model.id == modelID:
             for chain in model:
                 if str(chain.id) == chainID:
                     if len(chain.get_unpacked_list()[0].resname) == 1:
                         print("Your sequence is a nucleotide sequence (" \
                               "RNA)\n")
                         # alphabet = IUPAC.IUPACAmbiguousRNA._upper()
                         for residue in chain:
                             ## Check if the residue belongs to the
                             ## standard RNA and add those residues to the
                             ## seq
                             if residue.get_resname() in ['A', 'C',
                                                             'G', 'U']:
                                 seq.append(residue.get_resname())
                             else:
                                 seq.append("X")
                     elif len(chain.get_unpacked_list()[0].resname) == 2:
                         print("Your sequence is a nucleotide sequence (" \
                               "DNA)\n")
                         # alphabet = IUPAC.ExtendedIUPACDNA._upper()
                         for residue in chain:
                             ## Check if the residue belongs to the
                             ## standard DNA and add those residues to the
                             ## seq
                             if residue.get_resname()[1] in ['A', 'C',
                                                             'G', 'T']:
                                 seq.append(residue.get_resname()[1])
                             else:
                                 seq.append("X")
                     elif len(chain.get_unpacked_list()[0].resname) == 3:
                         counter = 0
                         for residue in chain:
                             if is_aa(residue.get_resname(), standard=True):
                                 # alphabet = IUPAC.ExtendedIUPACProtein._upper()
                                 ## The test checks if the amino acid
                                 ## is one of the 20 standard amino acids
                                 ## Some proteins have "UNK" or "XXX", or other symbols
                                 ## for missing or unknown residues
                                 seq.append(three_to_one(residue.get_resname()))
                                 counter += 1
                             else:
                                 seq.append("X")
                         if counter != 0:  # aminoacids
                             print("Your sequence is an aminoacid sequence")
                         else: # HETAM
                             print("Your sequence is a HETAM sequence")
                             for residue in chain:
                                 seq.append(residue.get_resname())
                     while seq[-1] == "X":
                         del seq[-1]
                     while seq[0] == "X":
                         del seq[0]
                     # return Seq(str(''.join(seq)), alphabet=alphabet)
                     return Seq(str(''.join(seq)))
def ExtractPDBSeq(residues):
    residueList = [
        r for r in residues if is_aa(r, standard=True) and (
            r.get_resname().upper() in ValidAA3Letters)
    ]
    #print residueList
    pdbseq = ''.join([three_to_one(r.get_resname()) for r in residueList])

    return pdbseq, residueList
Beispiel #33
0
def retrieveAtomicStructure(pdb_sequence):
    """Retrieves the atomic structure for a single chain of a PDB file, based on the measured structure"""
    pdb_structure = retrieveStructureFromPDB(pdb_sequence['pdb_id'])

    return {
        residue.get_id()[1]: residue.get_resname()
        for residue in pdb_structure[0][
            pdb_sequence['chain_id']].get_residues() if is_aa(residue)
    }
Beispiel #34
0
def parse_structure(path):
    """
    Parses a PDB formatter structure using Biopython's PDB Parser
    Verifies the integrity of the structure (gaps) and its
    suitability for the calculation (is it a complex?).
    """

    print('[+] Reading structure file: {0}'.format(path))
    fname = os.path.basename(path)
    sname = '.'.join(fname.split('.')[:-1])

    try:
        s = P.get_structure(sname, path)
    except Exception as e:
        print('[!] Structure \'{0}\' could not be parsed'.format(sname),
              file=sys.stderr)
        raise Exception(e)

    # Double occupancy check
    for atom in list(s.get_atoms()):
        if atom.is_disordered():
            residue = atom.parent
            sel_at = atom.selected_child
            sel_at.altloc = ' '
            sel_at.disordered_flag = 0
            residue.detach_child(atom.id)
            residue.add(sel_at)

    # Remove HETATMs and solvent
    res_list = list(s.get_residues())
    n_res = len(res_list)
    _ignore = lambda r: r.id[0][0] == 'W' or r.id[0][0] == 'H'
    for res in res_list:
        if _ignore(res):
            chain = res.parent
            chain.detach_child(res.id)
        elif not is_aa(res, standard=True):
            raise ValueError(
                'Unsupported non-standard amino acid found: {0}'.format(
                    res.resname))

    # Detect gaps and compare with no. of chains
    pep_builder = PPBuilder()
    peptides = pep_builder.build_peptides(s)
    n_peptides = len(peptides)
    n_chains = len(set([c.id for c in s.get_chains()]))

    if n_peptides != n_chains:
        print('[!] Structure contains gaps:', file=sys.stderr)
        for i_pp, pp in enumerate(peptides):
            print(
                '\t{1.parent.id} {1.resname}{1.id[1]} < Fragment {0} > {2.parent.id} {2.resname}{2.id[1]}'
                .format(i_pp, pp[0], pp[-1]),
                file=sys.stderr)
        #raise Exception('Calculation cannot proceed')

    return (s, n_chains, n_res)
Beispiel #35
0
 def _extract_residue(self, line):
     resseq = int(line[27:32].split()[0])
     resname = line[20:24].split()[0]
     if is_aa(resname):
         hetero_flag = " "
     elif resname == "HOH" or resname == "WAT":
         hetero_flag = "W"
     else:
         hetero_flag = "H"
     return resseq, resname, hetero_flag
Beispiel #36
0
def annotate(m, ss_seq):
    """Apply seconardary structure information to residues in model."""
    c = m.get_list()[0]
    all = c.get_list()
    residues = []
    # Now remove HOH etc.
    for res in all:
        if is_aa(res):
            residues.append(res)
    L = len(residues)
    if not L == len(ss_seq):
        raise ValueError("Length mismatch %i %i" % (L, len(ss_seq)))
    for i in range(0, L):
        residues[i].xtra["SS_PSEA"] = ss_seq[i]
Beispiel #37
0
def getResidueStrings(structure):
    seqs = []
    for model in structure:
        for ch in model.get_chains():
            seq = ''
            for residue in model.get_residues():
                resname = residue.get_resname()
                if is_aa(resname, standard=True):
                    seq += three_to_one(resname)
                elif resname in {'HIE', 'HID'}:
                    seq += 'H'
                elif resname in {'CYX', 'CYM'}:
                    seq += 'C'
                else:
                    seq += 'X'
            seqs.append(seq)
    return seqs
Beispiel #38
0
 def __init__(self, model, pdb_file):
     depth_dict = {}
     depth_list = []
     depth_keys = []
     # get_residue
     residue_list = Selection.unfold_entities(model, "R")
     # make surface from PDB file
     surface = get_surface(pdb_file)
     # calculate rdepth for each residue
     for residue in residue_list:
         if not is_aa(residue):
             continue
         rd = residue_depth(residue, surface)
         ca_rd = ca_depth(residue, surface)
         # Get the key
         res_id = residue.get_id()
         chain_id = residue.get_parent().get_id()
         depth_dict[(chain_id, res_id)] = (rd, ca_rd)
         depth_list.append((residue, (rd, ca_rd)))
         depth_keys.append((chain_id, res_id))
         # Update xtra information
         residue.xtra["EXP_RD"] = rd
         residue.xtra["EXP_RD_CA"] = ca_rd
     AbstractPropertyMap.__init__(self, depth_dict, depth_keys, depth_list)
Beispiel #39
0
def get_residue_depth(pdb_fh,msms_fh):
    """
    Extracts Residue depth from PDB structure 

    :param pdb_fh: path to PDB structure file
    :param msms_fh: path to MSMS libraries
    :returns data_depth: pandas table with residue depth per residue
    """
    from Bio.PDB import Selection,PDBParser
    from Bio.PDB.Polypeptide import is_aa
    from Bio.PDB.ResidueDepth import get_surface,_read_vertex_array,residue_depth,ca_depth,min_dist
    surface_fh="%s/%s.msms.vert" % (dirname(msms_fh),basename(pdb_fh))
    if not exists(surface_fh):
        pdb_to_xyzr_fh="%s/pdb_to_xyzr" % dirname(msms_fh)
        xyzr_fh="%s/%s.xyzr" % (dirname(msms_fh),basename(pdb_fh))
        pdb_to_xyzr_com="%s %s > %s" % (pdb_to_xyzr_fh,pdb_fh,xyzr_fh)
        msms_com="%s -probe_radius 1.5 -if %s -of %s > %s.log" % (msms_fh,xyzr_fh,splitext(surface_fh)[0],splitext(surface_fh)[0])
        log_fh="%s.log" % msms_fh
        log_f = open(log_fh,'a')
        log_f.write("%s;\n%s\n" % (pdb_to_xyzr_com,msms_com))
        subprocess.call("%s;%s" % (pdb_to_xyzr_com,msms_com) , shell=True,stdout=log_f, stderr=subprocess.STDOUT)
        log_f.close()

    surface =_read_vertex_array(surface_fh)
    
    pdb_parser=PDBParser()
    pdb_data=pdb_parser.get_structure("pdb_name",pdb_fh)
    model = pdb_data[0]
    residue_list = Selection.unfold_entities(model, 'R') 
    
    depth_dict = {}
    depth_list = []
    depth_keys = []
    for residue in residue_list:
        if not is_aa(residue):
            continue
        rd = residue_depth(residue, surface)
        ca_rd = ca_depth(residue, surface)
        # Get the key
        res_id = residue.get_id()
        chain_id = residue.get_parent().get_id()
        if chain_id=="A":
            depth_dict[(chain_id, res_id)] = (rd, ca_rd)
            depth_list.append((residue, (rd, ca_rd)))
            depth_keys.append((chain_id, res_id))
            # Update xtra information
            residue.xtra['EXP_RD'] = rd
            residue.xtra['EXP_RD_CA'] = ca_rd
        else:
            break
    depth_df=pd.DataFrame(depth_dict).T.reset_index()
    depth_df=depth_df.drop("level_0",axis=1)
    aasi_prev=0
    for i in range(len(depth_df)):
        if depth_df.loc[i,"level_1"][1]!=aasi_prev:
            depth_df.loc[i,"aasi"]=depth_df.loc[i,"level_1"][1]
            aasi_prev=depth_df.loc[i,"level_1"][1]

    depth_df=depth_df.drop("level_1",axis=1)
    depth_df=depth_df.loc[~pd.isnull(depth_df.loc[:,"aasi"]),:]
    depth_df=depth_df.set_index("aasi",drop=True)
    depth_df.columns=["Residue depth","Residue (C-alpha) depth"]
    return depth_df
def parse_structure(path):
    """
    Parses a structure using Biopython's PDB/mmCIF Parser
    Verifies the integrity of the structure (gaps) and its
    suitability for the calculation (is it a complex?).
    """

    print('[+] Reading structure file: {0}'.format(path))
    fname = os.path.basename(path)
    sname = '.'.join(fname.split('.')[:-1])
    s_ext = fname.split('.')[-1]

    _ext = set(('pdb', 'ent', 'cif'))
    if s_ext not in _ext:
        raise IOError('[!] Structure format \'{0}\' is not supported. Use \'.pdb\' or \'.cif\'.'.format(s_ext))

    if s_ext in set(('pdb', 'ent')):
        sparser = PDBParser(QUIET=1)
    elif s_ext == 'cif':
        sparser = MMCIFParser()

    try:
        s = sparser.get_structure(sname, path)
    except Exception as e:
        print('[!] Structure \'{0}\' could not be parsed'.format(sname), file=sys.stderr)
        raise Exception(e)

    # Keep first model only
    if len(s) > 1:
        print('[!] Structure contains more than one model. Only the first one will be kept')
        model_one = s[0].id
        for m in s.child_list[:]:
            if m.id != model_one:
                s.detach_child(m.id)

    # Double occupancy check
    for atom in list(s.get_atoms()):
        if atom.is_disordered():
            residue = atom.parent
            sel_at = atom.selected_child
            sel_at.altloc = ' '
            sel_at.disordered_flag = 0
            residue.detach_child(atom.id)
            residue.add(sel_at)

    # Remove HETATMs and solvent
    res_list = list(s.get_residues())

    def _is_het(residue):
        return residue.id[0][0] == 'W' or residue.id[0][0] == 'H'

    for res in res_list:
        if _is_het(res):
            chain = res.parent
            chain.detach_child(res.id)
        elif not is_aa(res, standard=True):
            raise ValueError('Unsupported non-standard amino acid found: {0}'.format(res.resname))
    n_res = len(list(s.get_residues()))

    # Remove Hydrogens
    atom_list = list(s.get_atoms())

    def _is_hydrogen(atom):
        return atom.element == 'H'

    for atom in atom_list:
        if _is_hydrogen(atom):
            residue = atom.parent
            residue.detach_child(atom.name)

    # Detect gaps and compare with no. of chains
    pep_builder = PPBuilder()
    peptides = pep_builder.build_peptides(s)
    n_peptides = len(peptides)
    n_chains = len(set([c.id for c in s.get_chains()]))

    if n_peptides != n_chains:
        print('[!] Structure contains gaps:', file=sys.stderr)
        for i_pp, pp in enumerate(peptides):
            print('\t{1.parent.id} {1.resname}{1.id[1]} < Fragment {0} > {2.parent.id} {2.resname}{2.id[1]}'.format(i_pp, pp[0], pp[-1]), file=sys.stderr)
        #raise Exception('Calculation cannot proceed')

    return (s, n_chains, n_res)
Beispiel #41
0
def validate_structure(s, selection=None, clean=True):
    # setup logging
    logger = logging.getLogger('Prodigy')

    # Keep first model only
    if len(s) > 1:
        logger.warning('[!] Structure contains more than one model. Only the first one will be kept')
        model_one = s[0].id
        for m in s.child_list[:]:
            if m.id != model_one:
                s.detach_child(m.id)

    # process selected chains
    chains = list(s.get_chains())
    chain_ids = set([c.id for c in chains])

    if selection:
        sel_chains = []
        # Match selected chain with structure
        for sel in selection:
            for c in sel.split(','):
                sel_chains.append(c)
                if c not in chain_ids:
                    raise ValueError('Selected chain not present in provided structure: {0}'.format(c))

        # Remove unselected chains
        _ignore = lambda x: x.id not in sel_chains
        for c in chains:
            if _ignore(c):
                c.parent.detach_child(c.id)

    # Double occupancy check
    for atom in list(s.get_atoms()):
        if atom.is_disordered():
            residue = atom.parent
            sel_at = atom.selected_child
            sel_at.altloc = ' '
            sel_at.disordered_flag = 0
            residue.detach_child(atom.id)
            residue.add(sel_at)

    if clean:
        # Remove HETATMs and solvent
        res_list = list(s.get_residues())
        _ignore = lambda r: r.id[0][0] == 'W' or r.id[0][0] == 'H'
        for res in res_list:
            if _ignore(res):
                chain = res.parent
                chain.detach_child(res.id)
            elif not is_aa(res, standard=True):
                raise ValueError('Unsupported non-standard amino acid found: {0}'.format(res.resname))

        # Remove Hydrogens
        atom_list = list(s.get_atoms())
        _ignore = lambda x: x.element == 'H'
        for atom in atom_list:
            if _ignore(atom):
                residue = atom.parent
                residue.detach_child(atom.name)

    # Detect gaps and compare with no. of chains
    pep_builder = PPBuilder()
    peptides = pep_builder.build_peptides(s)
    n_peptides = len(peptides)

    if n_peptides != len(chain_ids):
        message = '[!] Structure contains gaps:\n'
        for i_pp, pp in enumerate(peptides):
            message += '\t{1.parent.id} {1.resname}{1.id[1]} < Fragment {0} > ' \
                       '{2.parent.id} {2.resname}{2.id[1]}\n'.format(i_pp, pp[0], pp[-1])
        logger.warning(message)
        # raise Exception(message)

    return s
Beispiel #42
0
    def __init__(self, model, radius, offset=0, hse_up_key='HSE_U', hse_down_key='HSE_D', angle_key=None, check_chain_breaks=False, 
                 check_knots=False, receptor=None, signprot=None):
        """
        @param model: model
        @type model: L{Model}

        @param radius: HSE radius
        @type radius: float

        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int

        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string

        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string

        @param angle_key: key used to store the angle between CA-CB and CA-pCB in
        the entity.xtra attribute
        @type angle_key: string
        """
        assert(offset>=0)
        # For PyMOL visualization
        self.ca_cb_list=[]
        ppb=CaPPBuilder()
        ppl=ppb.build_peptides(model)
        hse_map={}
        hse_list=[]
        hse_keys=[]
        ### GP
        if model.get_id()!=0:
            model = model[0]
        residues_in_pdb,residues_with_proper_CA=[],[]
        if check_chain_breaks==True:
            # for m in model:
                for chain in model:
                    for res in chain:
                        # try:
                            if is_aa(res):
                                residues_in_pdb.append(res.get_id()[1])
                        # except:
                        #     if is_aa(chain):
                        #         residues_in_pdb.append(chain.get_id()[1])
                        #         print('chain', chain, res)
                        #         break
        self.clash_pairs = []
        self.chain_breaks = []
        
        if check_knots:
            possible_knots = PossibleKnots(receptor, signprot)
            knot_resis = possible_knots.get_resnums()
            self.remodel_resis = {}

        for pp1 in ppl:
            for i in range(0, len(pp1)):
                residues_with_proper_CA.append(pp1[i].get_id()[1])
                if i==0:
                    r1=None
                else:
                    r1=pp1[i-1]
                r2=pp1[i]
                if i==len(pp1)-1:
                    r3=None
                else:
                    r3=pp1[i+1]
                # This method is provided by the subclasses to calculate HSE
                result=self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle=result
                hse_u=0
                hse_d=0
                ca2=r2['CA'].get_vector()
                residue_up=[]   ### GP
                residue_down=[] ### GP
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        try:
                            if r2.get_id()[1]-1!=r1.get_id()[1] or r2.get_id()[1]+1!=r3.get_id()[1]:
                                pass
                            else:
                                raise Exception
                        except:
                            if pp1 is pp2 and abs(i-j)<=offset:
                            # neighboring residues in the chain are ignored
                                continue
                        ro=pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao=ro['CA'].get_vector()
                        d=(cao-ca2)
                        if d.norm()<radius:
                            if d.angle(pcb)<(math.pi/2):
                                hse_u+=1
                                ### GP
                                # Puts residues' names in a list that were found in the upper half sphere
                                residue_up.append(ro)

                                ### end of GP code
                            else:
                                hse_d+=1
                                ### GP
                                # Puts residues' names in a list that were found in the lower half sphere
                                residue_down.append(ro)
                                ### end of GP code
                res_id=r2.get_id()
                chain_id=r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)]=(hse_u, hse_d, angle)
                hse_list.append((r2, (residue_up, residue_down, hse_u, hse_d, angle)))
                ### GP residue_up and residue_down added to hse_list
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key]=hse_u
                r2.xtra[hse_down_key]=hse_d
                if angle_key:
                    r2.xtra[angle_key]=angle

                ### GP checking for knots
                if check_knots:
                    for knot in knot_resis:
                        if knot[0][1]==pp1[i].get_id()[1] and knot[0][0]==pp1[i].get_parent().get_id():
                            print(pp1[i].get_parent().get_id(),pp1[i])
                            for r in residue_up:
                                if r.get_parent().get_id()==knot[1][0] and r.get_id()[1] in knot[1][1]:
                                    print('close: ', r.get_parent().get_id(),r)
                                    resi_range = [knot[1][1][0], knot[1][1][-1]]
                                    if knot[1][0] not in self.remodel_resis:
                                        self.remodel_resis[knot[1][0]] = [resi_range]
                                    else:
                                        if resi_range not in self.remodel_resis[knot[1][0]]:
                                            self.remodel_resis[knot[1][0]].append(resi_range)

                ### GP checking for atom clashes
                include_prev, include_next = False, False
                try:
                    if pp1[i].get_id()[1]-1!=pp1[i-1].get_id()[1]:
                        include_prev = True
                except:
                    include_prev = False
                try:
                    if pp1[i].get_id()[1]+1!=pp1[i+1].get_id()[1]:
                        include_next = True
                except:
                    include_next = False
                for atom in pp1[i]:
                    ref_vector = atom.get_vector()
                    for other_res in residue_up:
                        try:
                            if other_res==pp1[i-1] and include_prev==False:
                                continue
                            elif len(pp1)>=i+1 and other_res==pp1[i+1] and include_next==False:
                                continue
                            else:
                                raise Exception
                        except:
                            for other_atom in other_res:
                                other_vector = other_atom.get_vector()
                                d = other_vector-ref_vector
                                if d.norm()<2:
                                    if len(str(pp1[i]['CA'].get_bfactor()).split('.')[1])==1:
                                        clash_res1 = float(str(pp1[i]['CA'].get_bfactor())+'0')
                                    else:
                                        clash_res1 = pp1[i]['CA'].get_bfactor()
                                    if len(str(other_res['CA'].get_bfactor()).split('.')[1])==1:
                                        clash_res2 = float(str(other_res['CA'].get_bfactor())+'0')
                                    else:
                                        clash_res2 = other_res['CA'].get_bfactor()
                                    self.clash_pairs.append([(clash_res1, pp1[i].get_id()[1]), (clash_res2, other_res.get_id()[1])])
        if check_chain_breaks==True:
            for r in residues_in_pdb:
                if r not in residues_with_proper_CA:
                    self.chain_breaks.append(r)
Beispiel #43
0
    def __init__(self, model, radius, offset, hse_up_key, hse_down_key, 
            angle_key=None):
        """
        @param model: model
        @type model: L{Model}

        @param radius: HSE radius
        @type radius: float

        @param offset: number of flanking residues that are ignored in the calculation
        of the number of neighbors
        @type offset: int

        @param hse_up_key: key used to store HSEup in the entity.xtra attribute
        @type hse_up_key: string

        @param hse_down_key: key used to store HSEdown in the entity.xtra attribute
        @type hse_down_key: string

        @param angle_key: key used to store the angle between CA-CB and CA-pCB in 
            the entity.xtra attribute
        @type angle_key: string
        """
        assert(offset>=0)
        # For PyMOL visualization
        self.ca_cb_list=[]
        ppb=CaPPBuilder()
        ppl=ppb.build_peptides(model)
        hse_map={}
        hse_list=[]
        hse_keys=[]
        for pp1 in ppl:
            for i in range(0, len(pp1)):
                if i==0:
                    r1=None
                else:
                    r1=pp1[i-1]
                r2=pp1[i]
                if i==len(pp1)-1:
                    r3=None
                else:
                    r3=pp1[i+1]
                # This method is provided by the subclasses to calculate HSE
                result=self._get_cb(r1, r2, r3)
                if result is None:
                    # Missing atoms, or i==0, or i==len(pp1)-1
                    continue
                pcb, angle=result
                hse_u=0
                hse_d=0
                ca2=r2['CA'].get_vector()
                for pp2 in ppl:
                    for j in range(0, len(pp2)):
                        if pp1 is pp2 and abs(i-j)<=offset:
                            # neighboring residues in the chain are ignored 
                            continue
                        ro=pp2[j]
                        if not is_aa(ro) or not ro.has_id('CA'):
                            continue
                        cao=ro['CA'].get_vector()
                        d=(cao-ca2)
                        if d.norm()<radius:
                            if d.angle(pcb)<(pi/2):
                                hse_u+=1
                            else:
                                hse_d+=1
                res_id=r2.get_id()
                chain_id=r2.get_parent().get_id()
                # Fill the 3 data structures
                hse_map[(chain_id, res_id)]=(hse_u, hse_d, angle)
                hse_list.append((r2, (hse_u, hse_d, angle)))
                hse_keys.append((chain_id, res_id))
                # Add to xtra
                r2.xtra[hse_up_key]=hse_u
                r2.xtra[hse_down_key]=hse_d
                if angle_key:
                    r2.xtra[angle_key]=angle
        AbstractPropertyMap.__init__(self, hse_map, hse_keys, hse_list)