Example #1
0
 def add(self, residue):
     """Add PdbResidue object to site (in the residues list and dict)"""
     residue = residue.copy(include_structure=True)
     if type(residue) == PdbResidue:
         self.residues.append(residue)
         self.residues_dict[residue.full_id] = residue
         residue.parent_site = self
     if type(residue) == Het:
         self.ligands.append(residue)
         residue.parent_site = self
         if residue.is_polymer:
             if residue.chain in self.structure[0]:
                 for r in residue.structure:
                     self.structure[0][residue.chain].add(r)
                 return True
             self.structure[0].add(residue.structure)
             return True
     if residue.structure:
         # Initialize structure if empty
         if self.structure is None:
             self.structure = Structure(self.id)
             self.structure.add(Model(0))
         chain_id = residue.structure.get_parent().get_id()
         if chain_id not in self.structure[0]:
             self.structure[0].add(Chain(chain_id))
         # Add residue structure to site structure
         if residue.structure.get_id() not in self.structure[0][chain_id]:
             self.structure[0][chain_id].add(residue.structure)
     return True
Example #2
0
def retrieve_sphere_model(structure):  #, score):
    """
    each chain is here represented by centre of mass only
    """
    sphere_struct = Structure('clustering_model')
    my_model = Model(0)
    sphere_struct.add(my_model)

    #bedzie zmieniona numeracja
    chain_mass_centres, index = [], 0
    for chain in structure.get_chains():
        my_chain = Chain(chain.id)
        sphere_struct[0].add(my_chain)

        coord = calculate_centre_of_complex(chain)
        chain_mass_centres.append(coord)
        my_residue = Residue((' ', index, ' '), chain.id, ' ')

        coords = array(coord, 'f')
        atom = Atom('CA', coords, 0, 0, ' ', 'CA', 1)

        my_chain.add(my_residue)
        my_residue.add(atom)

        index += 1
    del structure
    return sphere_struct
Example #3
0
    def slice(cls, obj, selection, name='slice'):
        """Create a new Structure object 'S2' from a slice of the current one, 'S1'. <selection> 
        defines which  descendents 'S1' will be stored in 'S2'."""
        from Bio.PDB.Structure import Structure
        from Bio.PDB.Model import Model
        from Bio.PDB.Chain import Chain

        ent = Structure(name)  # Biopython structure object
        # Loop over selection and determine what model/chain objects we need to create in order to
        # store the slice
        models = {}
        for item in selection:
            mid = item[1]
            cid = item[2]
            if mid not in models:
                models[mid] = set()  # store chain ids
            models[mid].add(cid)

        # Create model/chains to store slice
        for mid in models:
            ent.add(Model(mid))
            for cid in models[mid]:
                ent[mid].add(Chain(cid))

        # Add residues to slice
        for item in selection:
            mid = item[1]
            cid = item[2]
            rid = item[3]
            ent[mid][cid].add(obj[mid][cid][rid].copy())

        return cls(ent, name=name)
    def getStructFromFasta(self, fname, chainType):
        '''
    Creates a Bio.PDB.Structure object from a fasta file contained in fname. Atoms are not filled
    and thus no coordiantes availables. Implements from Structure to Residue hierarchy.
    :param fname: str. path to fasta file
    @chainType: str. "l" or "r"
    '''

        seq = self.parseFasta(
            fname, inputNumber="1" if chainType == "l" else
            "2")  #inpuNumber is used to report which partner fails if error
        prefix = self.splitExtendedPrefix(self.getExtendedPrefix(fname))[0]
        chainId = chainType.upper()
        residues = []
        struct = Structure(prefix)
        model = Model(0)
        struct.add(model)
        chain = Chain(chainId)
        model.add(chain)
        for i, aa in enumerate(seq):
            try:
                resname = one_to_three(aa)
            except KeyError:
                resname = "UNK"
            res = Residue((' ', i, ' '), resname, prefix)
            chain.add(res)
        return struct
    def create_sphere_representation(self):
        """
	each chain is here represented by centre of mass only
	"""
        new_struct = Structure('sphrere')
        my_model = Model(0)
        new_struct.add(my_model)

        chain_mass_centres, index = [], 1
        my_chain = Chain(self.fa_struct.chain)
        new_struct[0].add(my_chain)

        coord, self.molmass, self.radius = self.calculate_centre_of_complex(
            self.fa_struct.struct)
        my_residue = Residue((' ', index, ' '), "ALA", ' ')

        coords = array(coord, 'f')
        atom = Atom('CA', coords, 0, 0, ' ', ' CA', 1)

        my_chain.add(my_residue)
        my_residue.add(atom)

        self.cg_struct = new_struct
        name = "dddd" + self.fa_struct.chain
        self.save_pdb(new_struct, name)
 def create_new_chain(self, old_struct):
     s = Structure(old_struct.chain)
     my_model = Model(0)
     s.add(my_model)
     my_chain = Chain(old_struct.chain)
     my_model.add(my_chain)  #what if more chains in one component?
     return s
Example #7
0
    def init_structure(self, structure_id):
        """Initiate a new Structure object with given id.

        Arguments:
        o id - string
        """
        self.structure = Structure(structure_id)
Example #8
0
def retrieve_ca_model(structure):
    """
    chains are represented only by main chain atoms (Calfas or C4')
    """
    reduced_struct = Structure('clustering_model')
    my_model = Model(0)
    reduced_struct.add(my_model)

    main_chain_atoms = []
    for ch in structure[0]:
        my_chain = Chain(ch.id)
        reduced_struct[0].add(my_chain)
        for resi in ch:
            for atom in resi:
                #print "----", resi.id, resi.get_segid(), ch.id
                if atom.get_name() == "CA" or atom.get_name(
                ) == "C4'" or atom.get_name() == "C4*":
                    my_residue = Residue((' ', resi.id[1], ' '),
                                         resi.get_resname(), ' ')
                    atom = Atom('CA', atom.coord, 0, ' ', ' ', 'CA',
                                atom.get_serial_number())
                    my_chain.add(my_residue)
                    my_residue.add(atom)

                    main_chain_atoms.append(atom)

    return reduced_struct
Example #9
0
    def renumber_windowed_model(self, structure: Structure, alphafold_mmCIF_dict: Dict) -> Structure:
        # Grab the Alphafold dictionary entry that descrives the residue range in the structure
        seq_db_align_begin = int(alphafold_mmCIF_dict['_ma_target_ref_db_details.seq_db_align_begin'][0])
        seq_db_align_end = int(alphafold_mmCIF_dict['_ma_target_ref_db_details.seq_db_align_end'][0])

        # start empty
        renumbered_structure = Structure(structure.id)
        for model in structure:
            renumbered_model = Model(model.id)
            for chain in model:
                transcript_residue_number = seq_db_align_begin
                renumbered_chain = Chain(chain.id)
                for residue in chain:
                    renumbered_residue = residue.copy()
                    renumbered_residue.id = (' ', transcript_residue_number, ' ')
                    # The above copy routines fail to copy disorder properly - so just wipe out all notion of disorder
                    for atom in renumbered_residue:
                        atom.disordered_flag = 0
                    renumbered_residue.disordered = 0
                    renumbered_chain.add(renumbered_residue)
                    transcript_residue_number += 1

                assert transcript_residue_number == seq_db_align_end + 1
                renumbered_model.add(renumbered_chain)

            renumbered_structure.add(renumbered_model)
        return renumbered_structure
Example #10
0
def save_chain_to(chain, filename: str):
    from Bio.PDB.PDBIO import PDBIO
    io = PDBIO()
    # io.set_structure(chain.get_bio_chain())
    structure = Structure(filename)
    structure.add(chain)
    io.set_structure(structure)
    io.save(filename)
 def create_new_chain(self, id):
     """
     """
     self.fragment_lattice = Structure(id)
     my_model = Model(0)
     self.fragment_lattice.add(my_model)
     my_chain = Chain(id)
     my_model.add(my_chain)  #what if more chains in one component?
Example #12
0
def initialize_res(residue: Union[Geo, str]) -> Structure:
    """Creates a new structure containing a single amino acid. The type and
    geometry of the amino acid are determined by the argument, which has to be
    either a geometry object or a single-letter amino acid code.
    The amino acid will be placed into chain A of model 0."""

    if isinstance(residue, Geo):
        geo = residue
    elif isinstance(residue, str):
        geo = geometry(residue)
    else:
        raise ValueError("Invalid residue argument:", residue)

    segID = 1
    AA = geo.residue_name
    CA_N_length = geo.CA_N_length
    CA_C_length = geo.CA_C_length
    N_CA_C_angle = geo.N_CA_C_angle

    CA_coord = np.array([0.0, 0.0, 0.0])
    C_coord = np.array([CA_C_length, 0, 0])
    N_coord = np.array([
        CA_N_length * math.cos(N_CA_C_angle * (math.pi / 180.0)),
        CA_N_length * math.sin(N_CA_C_angle * (math.pi / 180.0)),
        0,
    ])

    N = Atom("N", N_coord, 0.0, 1.0, " ", " N", 0, "N")

    # Check if the peptide is capped or not
    if geo.residue_name == "ACE":
        CA = Atom("CH3", CA_coord, 0.0, 1.0, " ", " CH3", 0, "C")
    else:
        CA = Atom("CA", CA_coord, 0.0, 1.0, " ", " CA", 0, "C")

    C = Atom("C", C_coord, 0.0, 1.0, " ", " C", 0, "C")

    ##Create Carbonyl atom (to be moved later)
    C_O_length = geo.C_O_length
    CA_C_O_angle = geo.CA_C_O_angle
    N_CA_C_O_diangle = geo.N_CA_C_O_diangle

    carbonyl = calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle,
                                    N_CA_C_O_diangle)
    O = Atom("O", carbonyl, 0.0, 1.0, " ", " O", 0, "O")

    res = make_res_of_type(segID, N, CA, C, O, geo)

    cha = Chain("A")
    cha.add(res)

    mod = Model(0)
    mod.add(cha)

    struc = Structure("X")
    struc.add(mod)
    return struc
def single_chain_structure(chain, name='superposition'):
    from Bio.PDB.Structure import Structure
    from Bio.PDB.Model import Model

    structure = Structure(name)
    model = Model(0)
    structure.add(model)

    model.add(chain)

    return structure
Example #14
0
    def get_structure(self, name='RNA chain'):
        """Returns chain as a PDB.Structure object."""
        struc = Structure(name)
        model = Model(0)
        chain = Chain(self.chain_name)
        struc.add(model)
        struc[0].add(chain)

        for resi in self:
            struc[0][self.chain_name].add(resi)
        return struc
Example #15
0
def complex_save(given_complex, i, path):

    s = Structure(i)
    my_model = Model(0)
    s.add(my_model)
    for component in given_complex.components:
        my_model.add(
            component.pyrystruct.struct[0][component.pyrystruct.chain])
    out = PDBIO()
    out.set_structure(s)
    out.save(path)
    return path
    def saveStruct(self, fname, desiredOrder):
        io = PDBIO(use_model_flag=True)
        if desiredOrder is not None:
            children = self.structure.child_list

        self.structure = Structure(self.structId)
        for modelId in desiredOrder:
            child = [model for model in children if model.id == modelId][0]
            child.detach_parent()
            self.structure.add(child)

        io.set_structure(self.structure)
        io.save(fname)  #,  preserve_atom_numbering=True)
Example #17
0
def extract_model(pdb_struct, k):
    """
    Extract a model from the given PDB structure.
    """
    assert k < len(pdb_struct), 'missing specified model'

    new_struct = Structure(pdb_struct.id)
    new_model = pdb_struct[k].copy()
    new_model.id = 0
    new_model.serial_num = 1
    new_struct.add(new_model)

    return new_struct
Example #18
0
def get_chain(structure: Structure, return_a_chain: bool = False) -> ChainDesc:
    if return_a_chain:
        for chain in structure.get_chains():
            if chain.get_id() == 'A':
                return chain
    chains = [ChainDesc(chain=c) for c in structure.get_chains()]

    # fast return if structure contains a single chain
    if len(chains) == 1:
        print('structure contains a single chain')
        return chains[0]
    print(f'structure contains {len(chains)} chains')

    chains_with_ligands = [c for c in chains if c.if_has_ligands()]
    print(f'chains with ligands: {len(chains_with_ligands)}')

    shortened_chain_sequences = [c.get_shortened_seq() for c in chains_with_ligands]

    # - check all pair alignments
    # - if at least one pair has equality score less than threshold,
    #   ask user for which chain to choose (by its letter)
    # - otherwise (that means that all chains are similar) choose longest one

    equality_threshold = 0.95

    for first_index in range(len(shortened_chain_sequences)):
        for second_index in range(first_index+1, len(shortened_chain_sequences)):
            first_sequence = shortened_chain_sequences[first_index]
            second_sequence = shortened_chain_sequences[second_index]
            alignments = pairwise2.align.globalds(first_sequence, second_sequence, blosum62, -10, -0.5)
            first_aligned, second_aligned, score, begin, end = alignments[0]
            if score < equality_threshold:
                print(f'two different sequences found (score={score})')
                print('please enter a letter of chain to work with: ', end='')
                chain_letter = input()[0]
                chain_index = int(chain_letter) - int('A')
                return chains[chain_index]

    # as we are here, then no different chains were found -
    # so choose longest one
    def get_chain_length(chain: ChainDesc) -> int:
        sequence = chain.get_shortened_seq()
        return len(sequence)

    sorted_chains = sorted(chains_with_ligands, key=get_chain_length, reverse=True)

    longest_chain = sorted_chains[0]
    print(f'chain selected: {longest_chain.chain.get_id()}')

    return longest_chain
Example #19
0
def splitOnePDB(fname, outPath):

  try:
    s= parser.get_structure(fname, fname)
  except Exception:
    print ("Error loading pdb")
    return 0
  banLenChains=[]    
  try:
    for chain in s[0]:
      badResInChain=0
      for res in  chain.get_list():
        if not is_aa(res,standard=True):
          badResInChain+=1
      chainLen= sum(1 for res in chain if "CA" in res) - badResInChain
      if chainLen < MIN_SEQ_LEN or chainLen > MAX_SEQ_LEN:
        print(chainLen)
        banLenChains.append(chain.get_id())
  except KeyError:
    print ("Not good model")
    return 0  
  for badChainId in banLenChains:
    s[0].detach_child(badChainId)

  receptorChainList= []
  ligandChainList= []
  if len( s[0].get_list())<2:
    print(s)
    print( s[0].get_list())
    print("Not enough good chains")
    return 0
  for chain1 in s[0]:

    tmpReceptorList=[]
    for chain2 in s[0]:
      if chain1!= chain2:
        tmpReceptorList.append(chain2)
    if len(tmpReceptorList)>1 or not tmpReceptorList[0] in ligandChainList:   
      ligandChainList.append(chain1)
      receptorChainList.append(tmpReceptorList)
    
  prefix= os.path.basename(fname).split(".")[0]
  for i, (ligandChain, receptorChains) in enumerate(zip(ligandChainList, receptorChainList)):
    io=PDBIO()
    ligandStruct= Structure(prefix+"ligand")
    ligandStruct.add(Model(0))
    ligandChain.set_parent(ligandStruct[0])
    ligandStruct[0].add(ligandChain)
    io.set_structure(ligandStruct)
    io.save(os.path.join(outPath,prefix+"-"+str(i)+"_l_u.pdb"))

    io=PDBIO()
    receptorStruct= Structure(prefix+"receptor")
    receptorStruct.add(Model(0))
    for receptorChain in receptorChains:
      receptorChain.set_parent(receptorStruct[0])    
      receptorStruct[0].add(receptorChain)
    io.set_structure(receptorStruct)
    io.save(os.path.join(outPath,prefix+"-"+str(i)+"_r_u.pdb"))
    print( "ligand:", ligandChain, "receptor:",receptorChains )
Example #20
0
 def create_new_structure(self, name, chain_id):
     """
         creates new Bio.PDB structure object
     Parameters:
     -----------
         name        :   structure name
         chain_id    :   chain name (e.g. A, B, C) 
     Returns:
     ---------
         self.struct :   Bio.PDB object with model and chain inside
     """
     self.struct = Structure(name)
     my_model = Model(0)
     my_chain = Chain(chain_id)
     self.struct.add(my_model)
     self.struct[0].add(my_chain)
Example #21
0
    def init_structure(self, structure_id):
        """Initiate a new Structure object with given id.

        Arguments:
        o id - string
        """
        self.structure=Structure(structure_id)
Example #22
0
def featurize(structure: Structure) -> list[Any]:
    """
    Calculates 3D ML features from the `structure`.
    """
    structure1 = freesasa.Structure(pdbpath)
    result = freesasa.calc(structure1)
    area_classes = freesasa.classifyResults(result, structure1)

    Total_area = []
    Total_area.append(result.totalArea())

    Polar_Apolar = []

    for key in area_classes:
        # print( key, ": %.2f A2" % area_classes[key])
        Polar_Apolar.append(area_classes[key])
    # get all the residues
    residues = [res for res in structure.get_residues()]
    seq_length = []
    seq_length.append(len(residues))
    # calculate some random 3D features (you should be smarter here!)
    protein_length = residues[1]["CA"] - residues[-2]["CA"]
    angle = calc_dihedral(
        residues[1]["CA"].get_vector(),
        residues[2]["CA"].get_vector(),
        residues[-3]["CA"].get_vector(),
        residues[-2]["CA"].get_vector(),
    )
    # create the feature vector
    features = [Total_area, Polar_Apolar, protein_length, seq_length, angle]

    return features
Example #23
0
def getLigandNbrs(resids: List[Residue],
                  struct: Structure) -> List[ResidueDict]:
    """KDTree search the neighbors of a given list of residues(which constitue a ligand) 
    and return unique having tagged them with a ban identifier proteins within 5 angstrom of these residues. """
    ns = NeighborSearch(list(struct.get_atoms()))
    nbrs = []

    for r in resids:
        # a ligand consists of residues
        resatoms = r.child_list[0]
        #  each residue has an atom plucked at random
        for nbrresidues in ns.search(resatoms.get_coord(), 5, level='R'):
            # we grab all residues in radius around that atom and extend the list of neighbors with those
            nbrs.extend([nbrresidues])

    # Filter out the residues that constitute the ligand itself
    filtered = []
    for neighbor in nbrs:
        present = 0
        for constit in resids:
            if ResidueDict(constit) == ResidueDict(neighbor):
                present = 1
        if present == 0:
            filtered.append(ResidueDict(neighbor))

    return [*map(lambda x: addBanClass(x), set(filtered))]
Example #24
0
def getLigandResIds(ligchemid: str, struct: Structure) -> List[Residue]:
    """Returns a list of dictionaries specifying each _ligand_ of type @ligchemid as a biopython-residue inside a given @struct."""
    """*ligchemids are of type https://www.rcsb.org/ligand/IDS"""
    ligandResidues: List[Residue] = list(
        filter(lambda x: x.get_resname() == ligchemid,
               list(struct.get_residues())))
    return ligandResidues
def splitOnePDB(fname, chainIdL, chainIdR, outPath):
    print(os.path.basename(fname))
    try:
        s = parser.get_structure(os.path.basename(fname), fname)
    except Exception:
        print("Error loading pdb")
        return 0

    banLenChains = []
    try:
        for chain in s[0]:
            badResInChain = 0
            for res in chain.get_list():
                if not is_aa(res, standard=True) and res.resname != "HOH":
                    badResInChain += 1
            # for res in chain: print(res)
            chainLen = sum(1 for res in chain if "CA" in res) - badResInChain
            if chainLen < MIN_SEQ_LEN or chainLen > MAX_SEQ_LEN:
                print(chain, chainLen)
                banLenChains.append(chain.get_id())
    except KeyError:
        print("Not good model")
        return 0

    # print(banLenChains)
    if len(s[0].get_list()) - len(banLenChains) < 2:
        print(s)
        print(s[0].get_list())
        print("Not enough good chains")
        return 0

    ligandChains, receptorChains = findNeigChains(s, chainIdL, chainIdR)
    print("ligand:", ligandChains, "receptor:", receptorChains)

    prefix = os.path.basename(fname).split(".")[0]

    io = PDBIO()
    ligandStruct = Structure(prefix + "ligand")
    ligandStruct.add(Model(0))

    for ligandChain in ligandChains:
        ligandChain.set_parent(ligandStruct[0])
        ligandStruct[0].add(ligandChain)
    io.set_structure(ligandStruct)
    io.save(
        os.path.join(outPath, prefix + "-" + chainIdL + chainIdR + "_l_u.pdb"))

    io = PDBIO()
    receptorStruct = Structure(prefix + "receptor")
    receptorStruct.add(Model(0))
    for receptorChain in receptorChains:
        receptorChain.set_parent(receptorStruct[0])
        receptorStruct[0].add(receptorChain)
    io.set_structure(receptorStruct)
    io.save(
        os.path.join(outPath, prefix + "-" + chainIdL + chainIdR + "_r_u.pdb"))
Example #26
0
def multiply_model(pdb_struct, num_models):
    """
    Given a single-model PDB structure, multiply that model.
    """
    assert len(pdb_struct) == 1, 'single-model PDB file required'

    new_struct = Structure(pdb_struct.id)

    for i in range(num_models):
        new_model = pdb_struct[0].copy()
        new_model.detach_parent()
        new_model.id = i
        new_model.serial_num = i + 1
        new_struct.add(new_model)
        new_model.set_parent(new_struct)

    return new_struct
Example #27
0
def initialize_res(residue):
    '''Creates a new structure containing a single amino acid. The type and
    geometry of the amino acid are determined by the argument, which has to be
    either a geometry object or a single-letter amino acid code.
    The amino acid will be placed into chain A of model 0.'''
    
    if isinstance( residue, Geo ):
        geo = residue
    else:
        geo= Geo(residue) 
    
    segID=1
    AA= geo.residue_name
    CA_N_length=geo.CA_N_length
    CA_C_length=geo.CA_C_length
    N_CA_C_angle=geo.N_CA_C_angle
    
    CA_coord= np.array([0.,0.,0.])
    C_coord= np.array([CA_C_length,0,0])
    N_coord = np.array([CA_N_length*math.cos(N_CA_C_angle*(math.pi/180.0)),CA_N_length*math.sin(N_CA_C_angle*(math.pi/180.0)),0])

    N= Atom("N", N_coord, 0.0 , 1.0, " "," N", 0, "N")
    CA=Atom("CA", CA_coord, 0.0 , 1.0, " "," CA", 0,"C")
    C= Atom("C", C_coord, 0.0, 1.0, " ", " C",0,"C")

    ##Create Carbonyl atom (to be moved later)
    C_O_length=geo.C_O_length
    CA_C_O_angle=geo.CA_C_O_angle
    N_CA_C_O_diangle=geo.N_CA_C_O_diangle
    
    carbonyl=calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle)
    O= Atom("O",carbonyl , 0.0 , 1.0, " "," O", 0, "O")

    res=makeRes(segID, N, CA, C, O, geo)

    cha= Chain('A')
    cha.add(res)
    
    mod= Model(0)
    mod.add(cha)

    struc= Structure('X')
    struc.add(mod)
    return struc
Example #28
0
    def save_pdb(self, complex_id, temp = "", name = ""):
        """
        gets coordinates of all complex components and writes them in one
        file one component = one pdb model
        
        Parameters:
        ------------
            complex_id  : number of complex from simulation
        Returns:
        --------
            pdb files with simulated components in OUTFOLDER
        """
        ##add component chain by chain not residue by residue.
        model_num = 0
        score = round(self.simulation_score, 4)
        s = Structure(complex_id) 
        my_model = Model(0)
        s.add(my_model)
        
        for component in self.components:
#@TODO: #what if more chains in one component?
            my_model.add(component.pyrystruct.struct[0][component.pyrystruct.chain])
        out = PDBIO()
        out.set_structure(s)
        outname = outfolder.outdirname.split("/")[-1]

        temp = str(temp)

        try:
            temp = round(float(temp),1)
        except: pass

        if name:
            fi_name = str(outfolder.outdirname)+'/'+name+'_'+str(score)+'_'+str(complex_id)+"_"+str(temp)+'.pdb'
            out.save(fi_name)
        else:
            fi_name = str(outfolder.outdirname)+'/'+str(outname)+"_"+str(score)+'_'+str(complex_id)+"_"+str(temp)+'.pdb'          
            out.save(fi_name)

        for comp in self.components:
            comp.pyrystruct.struct[0][comp.pyrystruct.chain].detach_parent()

        return fi_name
    def __make_structure_from_residues__(self, residues):
        """
        Makes a Structure object either from a pdbfile or a list of residues
        """
        # KR: this probably can be outsourced to another module.
        struct = Structure('s')
        model = Model('m')
        n_chain = 1
        chain = Chain('c%i' % n_chain)

        for residue in residues:
            if chain.has_id(residue.id):
                model.add(chain)
                n_chain += 1
                chain = Chain('c%i' % n_chain)
            chain.add(residue)

        model.add(chain)
        struct.add(model)
        return struct
Example #30
0
def get_ptc_residues(struct: Structure, pdbid: str,
                     conserved_nucleotides: List[int]) -> List[Residue]:

    ECOLI_PTC_CONSERVED_NUCLEOTIDES = [
        '2055', '2451', '2452', '2504', '2505', '2506', '2507'
    ]

    def belongs_to_ptc(x: Residue):
        return int(x.get_id()[1]) in conserved_nucleotides

    PTC_residues = filter(belongs_to_ptc, [*struct.get_residues()])
    return [*PTC_residues]
Example #31
0
    def calculate_BSA(self):
        "Uses NACCESS module in order to calculate the Buried Surface Area"

        # Extract list of chains in the interface only
        chains = list(self.get_chains())
           
        # Create temporary structures to feed NACCESS
        structure_A=Structure("chainA")
        structure_B=Structure("chainB")
        mA = Model(0)
        mB = Model(0)
        mA.add(self.model[chains[0]])
        mB.add(self.model[chains[1]])
        structure_A.add(mA)
        structure_B.add(mB)
        
        # Calculate SASAs
        NACCESS_atomic(self.model)
        NACCESS_atomic(structure_A[0])
        NACCESS_atomic(structure_B[0])

        sas_tot= _get_atomic_SASA(self.model)
        #print 'Accessible surface area, complex:', sas_tot
        sas_A= _get_atomic_SASA(structure_A)
        #print 'Accessible surface aream CHAIN A :', sas_A
        sas_B= _get_atomic_SASA(structure_B)
        #print 'Accessible surface aream CHAIN B :',sas_B
        
        # Calculate BSA
        bsa = sas_A+sas_B-sas_tot
                
        return [bsa, sas_A, sas_B, sas_tot]
Example #32
0
    def _rsa_calculation(self, model, chain_list, rsa_threshold):
        "Uses NACCESS module in order to calculate the Buried Surface Area"
        pairs=[]
        # Create temporary structures to feed NACCESS
        structure_A=Structure("chainA")
        structure_B=Structure("chainB")
        mA = Model(0)
        mB = Model(0)
        mA.add(model[chain_list[0]])
        mB.add(model[chain_list[1]])
        structure_A.add(mA)
        structure_B.add(mB)
        # Calculate SASAs
        nacc_at=NACCESS(model)
        model_values=[]
                
        res_list = [r for r in model.get_residues() if r.id[0] == ' ']
        structure_A_reslist =[r for r in structure_A[0].get_residues() if r.id[0] == ' ']
        structure_B_reslist =[r for r in structure_B[0].get_residues() if r.id[0] == ' ']
        
        for res in res_list:
            model_values.append(float(res.xtra['EXP_NACCESS']['all_atoms_rel']))
            
                
        sas_tot= self._get_residue_SASA(model)
        #print 'Accessible surface area, complex:', sas_tot

        nacc_at=NACCESS(structure_A[0])
        nacc_at=NACCESS(structure_B[0])
        submodel_values=[]
                
        for res in structure_A_reslist:
            if res.id[0]==' ':
                submodel_values.append(float(res.xtra['EXP_NACCESS']['all_atoms_rel']))                
                
        for res in structure_B_reslist:
            if res.id[0]==' ':
                submodel_values.append(float(res.xtra['EXP_NACCESS']['all_atoms_rel']))
        
        count=0        
        for res in res_list:
            if res in structure_A_reslist and ((submodel_values[count] - model_values[count]) > rsa_threshold):
                pairs.append(res)
            elif res in structure_B_reslist and ((submodel_values[count] - model_values[count]) > rsa_threshold):
                pairs.append(res)
            count=count+1
        
        
        sas_A= self._get_residue_SASA(structure_A)
        #print 'Accessible surface aream CHAIN A :', sas_A
        sas_B= self._get_residue_SASA(structure_B)
        #print 'Accessible surface aream CHAIN B :',sas_B
        
        # Calculate BSA
        bsa = sas_A+sas_B-sas_tot
                
        self.interface.accessibility=[bsa, sas_A, sas_B, sas_tot]
        
        return pairs
    def createPDBFile(self):
        "Create test CIF file with 12 Atoms in icosahedron vertexes"
        from Bio.PDB.Structure import Structure
        from Bio.PDB.Model import Model
        from Bio.PDB.Chain import Chain
        from Bio.PDB.Residue import Residue
        from Bio.PDB.Atom import Atom
        from Bio.PDB.mmcifio import MMCIFIO
        import os
        CIFFILENAME = "/tmp/out.cif"

        # create atom struct with ico simmety (i222r)
        icosahedron = Icosahedron(circumscribed_radius=100, orientation='222r')
        pentomVectorI222r = icosahedron.getVertices()

        # create biopython object
        structure = Structure('result')  # structure_id
        model = Model(1, 1)  # model_id,serial_num
        structure.add(model)
        chain = Chain('A')  # chain Id
        model.add(chain)
        for i, v in enumerate(pentomVectorI222r, 1):
            res_id = (' ', i, ' ')  # first arg ' ' -> aTOm else heteroatom
            res_name = "ALA"  #+ str(i)  # define name of residue
            res_segid = '    '
            residue = Residue(res_id, res_name, res_segid)
            chain.add(residue)
            # ATOM name, coord, bfactor, occupancy, altloc, fullname, serial_number,
            #             element=None)
            atom = Atom('CA', v, 0., 1., " ", " CA ", i, "C")
            residue.add(atom)

        io = MMCIFIO()
        io.set_structure(structure)
        # delete file if exists
        if os.path.exists(CIFFILENAME):
            os.remove(CIFFILENAME)
        io.save(CIFFILENAME)
        return CIFFILENAME
Example #34
0
class StructureBuilder(object):
    """
    Deals with contructing the Structure object. The StructureBuilder class is used
    by the PDBParser classes to translate a file to a Structure object.
    """
    def __init__(self):
        self.line_counter=0
        self.header={}

    def _is_completely_disordered(self, residue):
        "Return 1 if all atoms in the residue have a non blank altloc."
        atom_list=residue.get_unpacked_list()
        for atom in atom_list:
            altloc=atom.get_altloc()
            if altloc==" ":
                return 0
        return 1

    # Public methods called by the Parser classes

    def set_header(self, header):
        self.header=header

    def set_line_counter(self, line_counter):
        """
        The line counter keeps track of the line in the PDB file that
        is being parsed.

        Arguments:
        o line_counter - int
        """
        self.line_counter=line_counter

    def init_structure(self, structure_id):
        """Initiate a new Structure object with given id.

        Arguments:
        o id - string
        """
        self.structure=Structure(structure_id)

    def init_model(self, model_id, serial_num = None):
        """Initiate a new Model object with given id.

        Arguments:
        o id - int
        o serial_num - int
        """
        self.model=Model(model_id,serial_num)
        self.structure.add(self.model)

    def init_chain(self, chain_id):
        """Initiate a new Chain object with given id.

        Arguments:
        o chain_id - string
        """
        if self.model.has_id(chain_id):
            self.chain=self.model[chain_id]
            warnings.warn("WARNING: Chain %s is discontinuous at line %i."
                          % (chain_id, self.line_counter),
                          PDBConstructionWarning)
        else:
            self.chain=Chain(chain_id)
            self.model.add(self.chain)

    def init_seg(self, segid):
        """Flag a change in segid.

        Arguments:
        o segid - string
        """
        self.segid=segid

    def init_residue(self, resname, field, resseq, icode):
        """
        Initiate a new Residue object.

        Arguments:
        o resname - string, e.g. "ASN"
        o field - hetero flag, "W" for waters, "H" for
            hetero residues, otherwise blank.
        o resseq - int, sequence identifier
        o icode - string, insertion code
        """
        if field!=" ":
            if field=="H":
                # The hetero field consists of H_ + the residue name (e.g. H_FUC)
                field="H_"+resname
        res_id=(field, resseq, icode)
        if field==" ":
            if self.chain.has_id(res_id):
                # There already is a residue with the id (field, resseq, icode).
                # This only makes sense in the case of a point mutation.
                warnings.warn("WARNING: Residue ('%s', %i, '%s') "
                              "redefined at line %i."
                              % (field, resseq, icode, self.line_counter),
                              PDBConstructionWarning)
                duplicate_residue=self.chain[res_id]
                if duplicate_residue.is_disordered()==2:
                    # The residue in the chain is a DisorderedResidue object.
                    # So just add the last Residue object.
                    if duplicate_residue.disordered_has_id(resname):
                        # The residue was already made
                        self.residue=duplicate_residue
                        duplicate_residue.disordered_select(resname)
                    else:
                        # Make a new residue and add it to the already
                        # present DisorderedResidue
                        new_residue=Residue(res_id, resname, self.segid)
                        duplicate_residue.disordered_add(new_residue)
                        self.residue=duplicate_residue
                        return
                else:
                    # Make a new DisorderedResidue object and put all
                    # the Residue objects with the id (field, resseq, icode) in it.
                    # These residues each should have non-blank altlocs for all their atoms.
                    # If not, the PDB file probably contains an error.
                    if not self._is_completely_disordered(duplicate_residue):
                        # if this exception is ignored, a residue will be missing
                        self.residue=None
                        raise PDBConstructionException(
                            "Blank altlocs in duplicate residue %s ('%s', %i, '%s')"
                            % (resname, field, resseq, icode))
                    self.chain.detach_child(res_id)
                    new_residue=Residue(res_id, resname, self.segid)
                    disordered_residue=DisorderedResidue(res_id)
                    self.chain.add(disordered_residue)
                    disordered_residue.disordered_add(duplicate_residue)
                    disordered_residue.disordered_add(new_residue)
                    self.residue=disordered_residue
                    return
        residue=Residue(res_id, resname, self.segid)
        self.chain.add(residue)
        self.residue=residue

    def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname,
                  serial_number=None, element=None):
        """
        Initiate a new Atom object.

        Arguments:
        o name - string, atom name, e.g. CA, spaces should be stripped
        o coord - Numeric array (Float0, size 3), atomic coordinates
        o b_factor - float, B factor
        o occupancy - float
        o altloc - string, alternative location specifier
        o fullname - string, atom name including spaces, e.g. " CA "
        o element - string, upper case, e.g. "HG" for mercury
        """
        residue=self.residue
        # if residue is None, an exception was generated during
        # the construction of the residue
        if residue is None:
            return
        # First check if this atom is already present in the residue.
        # If it is, it might be due to the fact that the two atoms have atom
        # names that differ only in spaces (e.g. "CA.." and ".CA.",
        # where the dots are spaces). If that is so, use all spaces
        # in the atom name of the current atom.
        if residue.has_id(name):
                duplicate_atom=residue[name]
                # atom name with spaces of duplicate atom
                duplicate_fullname=duplicate_atom.get_fullname()
                if duplicate_fullname!=fullname:
                    # name of current atom now includes spaces
                    name=fullname
                    warnings.warn("Atom names %r and %r differ "
                                  "only in spaces at line %i."
                                  % (duplicate_fullname, fullname,
                                     self.line_counter),
                                  PDBConstructionWarning)
        atom=self.atom=Atom(name, coord, b_factor, occupancy, altloc,
                            fullname, serial_number, element)
        if altloc!=" ":
            # The atom is disordered
            if residue.has_id(name):
                # Residue already contains this atom
                duplicate_atom=residue[name]
                if duplicate_atom.is_disordered()==2:
                    duplicate_atom.disordered_add(atom)
                else:
                    # This is an error in the PDB file:
                    # a disordered atom is found with a blank altloc
                    # Detach the duplicate atom, and put it in a
                    # DisorderedAtom object together with the current
                    # atom.
                    residue.detach_child(name)
                    disordered_atom=DisorderedAtom(name)
                    residue.add(disordered_atom)
                    disordered_atom.disordered_add(atom)
                    disordered_atom.disordered_add(duplicate_atom)
                    residue.flag_disordered()
                    warnings.warn("WARNING: disordered atom found "
                                  "with blank altloc before line %i.\n"
                                  % self.line_counter,
                                  PDBConstructionWarning)
            else:
                # The residue does not contain this disordered atom
                # so we create a new one.
                disordered_atom=DisorderedAtom(name)
                residue.add(disordered_atom)
                # Add the real atom to the disordered atom, and the
                # disordered atom to the residue
                disordered_atom.disordered_add(atom)
                residue.flag_disordered()
        else:
            # The atom is not disordered
            residue.add(atom)

    def set_anisou(self, anisou_array):
        "Set anisotropic B factor of current Atom."
        self.atom.set_anisou(anisou_array)

    def set_siguij(self, siguij_array):
        "Set standard deviation of anisotropic B factor of current Atom."
        self.atom.set_siguij(siguij_array)

    def set_sigatm(self, sigatm_array):
        "Set standard deviation of atom position of current Atom."
        self.atom.set_sigatm(sigatm_array)

    def get_structure(self):
        "Return the structure."
        # first sort everything
        # self.structure.sort()
        # Add the header dict
        self.structure.header=self.header
        return self.structure

    def set_symmetry(self, spacegroup, cell):
        pass
					if atom.coord[0] < atom2.coord[0]:
						atom3.coord[0] += xDistancePerStep
					elif atom.coord[0] > atom2.coord[0]:
						atom3.coord[0] -= xDistancePerStep

					if atom.coord[1] < atom2.coord[1]:
						atom3.coord[1] += yDistancePerStep
					elif atom.coord[1] > atom2.coord[1]:
						atom3.coord[1] -= yDistancePerStep	

					if atom.coord[2] < atom2.coord[2]:
						atom3.coord[2] += zDistancePerStep
					elif atom.coord[2] > atom2.coord[2]:
						atom3.coord[2] -= zDistancePerStep

		yield newModel

	if startEndInclusive:
		final.id = steps + 1

		yield final

modelFrame = 0
for model in interpolate(structure[0], structure[1], 10, True):
	result = Structure('result')
	result.add(model)
	io = PDBIO()
	io.set_structure(result)
	io.save('frames/out_' + str(modelFrame) + '.pdb')
	modelFrame += 1
Example #36
0
    for line in open(filename).readlines():
        if not line.startswith('#'):
            bfactors[ind] = array((line.split())[column]) 
            ind = ind+1
    return bfactors	
#--------------------------------------------------------------------
points = ReadXYZ ( args['src'], args['scale'])
if ( args['bfactor'] is not None):
    print "read bfactor file column %d" % args['column']
    bfactors = ReadBfactor(args['bfactor'],args['column'])
else:
    bfactors = zeros(len(points))

model = Model(1)
chain = Chain("A")
structure = Structure("ref")

num_count = 0
for i in range(0,shape(points)[0]):
    num_count = num_count +1
    res_id = (' ',num_count,' ')
    residue = Residue(res_id,'ALA',' ')
    cur_coord = tuple(points[i])
    bfactor = bfactors[i]
    atom = Atom('CA',cur_coord,bfactor,0,' ','CA',num_count,'C')
    residue.add(atom)
    chain.add(residue)

model.add(chain)
structure.add(model)
# --------------------------------------------------------------------
from Bio.PDB import PDBParser, PDBIO, Superimposer
from Bio.PDB.Structure import Structure
from copy import deepcopy

parser = PDBParser()

initial = parser.get_structure("initial", "../PDBFiles/1N3W_ALIGNED_1PEB.pdb")
final = parser.get_structure("final", "../PDBFiles/1PEB_ALIGNED_1N3W.pdb")

result = Structure("result")

sup = Superimposer()
sup.set_atoms([atom for atom in initial.get_atoms()], [atom for atom in final.get_atoms()])
sup.apply([atom for atom in final.get_atoms()])


def interpolate(initial, final, steps, startEndInclusive):
    if startEndInclusive:
        yield initial

    newModel = deepcopy(initial)

    for index in range(1, steps + 1):
        newModel.id = index

        for chain, chain2, chain3 in zip(initial, final, newModel):
            for residue, residue2, residue3 in zip(chain, chain2, chain3):
                for atom, atom2, atom3 in zip(residue, residue2, residue3):

                    # The distances between each point's coordinates in cartasian space
                    xDistance = abs(atom.coord[0] - atom2.coord[0])
Example #38
0
	"""
	num_lines = sum(1 for line in open(filename))
	points = zeros(shape=(num_lines,3))
	ind = 0
	for line in open(filename).readlines():
		points[ind] = array((line.split()[0:3])) 
		points[ind] = points[ind] * scale
		ind = ind+1
	return points	



#--------------------------------------------------------------------
ref_ptsfilename = "K562.pts"
refid = "ref"
structure = Structure(refid)
model_ref = Model(1)
chain_ref = Chain("A")
points_ref = ReadXYZ(ref_ptsfilename,scale)
	
num_count = 0
for i in range(0,shape(points_ref[IndexList])[0]):
	num_count = num_count +1
	res_id = (' ',num_count,' ')
	residue = Residue(res_id,'ALA',' ')
	cur_coord = tuple(points_ref[IndexList[i]])
	atom = Atom('CA',cur_coord,0,0,' ',num_count,num_count,'C')
	residue.add(atom)
	chain_ref.add(residue)
model_ref.add(chain_ref)
structure.add(model_ref)