def getStructFromFasta(self, fname, chainType): ''' Creates a Bio.PDB.Structure object from a fasta file contained in fname. Atoms are not filled and thus no coordiantes availables. Implements from Structure to Residue hierarchy. :param fname: str. path to fasta file @chainType: str. "l" or "r" ''' seq = self.parseFasta( fname, inputNumber="1" if chainType == "l" else "2") #inpuNumber is used to report which partner fails if error prefix = self.splitExtendedPrefix(self.getExtendedPrefix(fname))[0] chainId = chainType.upper() residues = [] struct = Structure(prefix) model = Model(0) struct.add(model) chain = Chain(chainId) model.add(chain) for i, aa in enumerate(seq): try: resname = one_to_three(aa) except KeyError: resname = "UNK" res = Residue((' ', i, ' '), resname, prefix) chain.add(res) return struct
def retrieve_ca_model(structure): """ chains are represented only by main chain atoms (Calfas or C4') """ reduced_struct = Structure('clustering_model') my_model = Model(0) reduced_struct.add(my_model) main_chain_atoms = [] for ch in structure[0]: my_chain = Chain(ch.id) reduced_struct[0].add(my_chain) for resi in ch: for atom in resi: #print "----", resi.id, resi.get_segid(), ch.id if atom.get_name() == "CA" or atom.get_name( ) == "C4'" or atom.get_name() == "C4*": my_residue = Residue((' ', resi.id[1], ' '), resi.get_resname(), ' ') atom = Atom('CA', atom.coord, 0, ' ', ' ', 'CA', atom.get_serial_number()) my_chain.add(my_residue) my_residue.add(atom) main_chain_atoms.append(atom) return reduced_struct
def renumber_windowed_model(self, structure: Structure, alphafold_mmCIF_dict: Dict) -> Structure: # Grab the Alphafold dictionary entry that descrives the residue range in the structure seq_db_align_begin = int(alphafold_mmCIF_dict['_ma_target_ref_db_details.seq_db_align_begin'][0]) seq_db_align_end = int(alphafold_mmCIF_dict['_ma_target_ref_db_details.seq_db_align_end'][0]) # start empty renumbered_structure = Structure(structure.id) for model in structure: renumbered_model = Model(model.id) for chain in model: transcript_residue_number = seq_db_align_begin renumbered_chain = Chain(chain.id) for residue in chain: renumbered_residue = residue.copy() renumbered_residue.id = (' ', transcript_residue_number, ' ') # The above copy routines fail to copy disorder properly - so just wipe out all notion of disorder for atom in renumbered_residue: atom.disordered_flag = 0 renumbered_residue.disordered = 0 renumbered_chain.add(renumbered_residue) transcript_residue_number += 1 assert transcript_residue_number == seq_db_align_end + 1 renumbered_model.add(renumbered_chain) renumbered_structure.add(renumbered_model) return renumbered_structure
def retrieve_sphere_model(structure): #, score): """ each chain is here represented by centre of mass only """ sphere_struct = Structure('clustering_model') my_model = Model(0) sphere_struct.add(my_model) #bedzie zmieniona numeracja chain_mass_centres, index = [], 0 for chain in structure.get_chains(): my_chain = Chain(chain.id) sphere_struct[0].add(my_chain) coord = calculate_centre_of_complex(chain) chain_mass_centres.append(coord) my_residue = Residue((' ', index, ' '), chain.id, ' ') coords = array(coord, 'f') atom = Atom('CA', coords, 0, 0, ' ', 'CA', 1) my_chain.add(my_residue) my_residue.add(atom) index += 1 del structure return sphere_struct
def renumber(chain, new_id=" "): """ Renumber a chain from 1, stripping insertion codes. :param `Bio.PDB.Chain` chain: structure to sanitise. :param str new_id: ID of the new chain. :return: A 2-tuple containing the following: 1. The new :py:class:`Bio.PDB.Chain.Chain` object. 2. A list of tuples containing the old residue ID, as returned by :py:meth:`Bio.PDB.Chain.Chain.get_id`. """ mapping = [] sanitised_chain = Chain(new_id) for res_index, res in enumerate(chain): sanitised_res = Residue( (res.get_id()[0], res_index + 1, ' '), res.get_resname(), res.get_segid()) mapping.append(res.get_id()) for atom in res: sanitised_res.add(atom.copy()) sanitised_chain.add(sanitised_res) return mapping, sanitised_chain
def create_sphere_representation(self): """ each chain is here represented by centre of mass only """ new_struct = Structure('sphrere') my_model = Model(0) new_struct.add(my_model) chain_mass_centres, index = [], 1 my_chain = Chain(self.fa_struct.chain) new_struct[0].add(my_chain) coord, self.molmass, self.radius = self.calculate_centre_of_complex( self.fa_struct.struct) my_residue = Residue((' ', index, ' '), "ALA", ' ') coords = array(coord, 'f') atom = Atom('CA', coords, 0, 0, ' ', ' CA', 1) my_chain.add(my_residue) my_residue.add(atom) self.cg_struct = new_struct name = "dddd" + self.fa_struct.chain self.save_pdb(new_struct, name)
def _align(self): pp_a = self._pp(self.protein_A, 'A') # seq_a = pp_a.get_sequence() pp_b = self._pp(self.protein_B, ' ') # seq_b = pp_b.get_sequence() # global_align = pairwise2.align.globalxx(seq_a, seq_b)[0] # msa = MultipleSeqAlignment([SeqRecord(Seq(global_align[0], alphabet=generic_protein), id='A'), # SeqRecord(Seq(global_align[1], alphabet=generic_protein), id='B')]) msa = self.alignment # offset_a = re.search(r'[^-]', str(msa[0].seq)).span()[0] # offset_b = re.search(r'[^-]', str(msa[1].seq)).span()[0] plus = 1000 for i in range(len(pp_a)): pp_a[i].id = (pp_a[i].id[0], plus + i, pp_a[i].id[2]) for i in range(len(pp_b)): pp_b[i].id = (pp_b[i].id[0], plus + i, pp_b[i].id[2]) new_chain_a = Chain(' ') for i in pp_a: # i.id = (i.id[0], i.id[1] - plus, i.id[2]) new_chain_a.add(i) new_chain_b = Chain(' ') for i in pp_b: # i.id = (i.id[0], i.id[1] - plus, i.id[2]) new_chain_b.add(i) io = PDBIO() io.set_structure(new_chain_a) io.save(f'.tmp.protein_a.pdb') io = PDBIO() io.set_structure(new_chain_b) io.save(f'.tmp.protein_b.pdb')
def normalize_chain(chain: Chain) -> Chain: new_chain = Chain(chain.id) for residue in chain: try: new_chain.add(normalize_residue(residue)) except UnknownResidueError: pass return new_chain
def initialize_res(residue: Union[Geo, str]) -> Structure: """Creates a new structure containing a single amino acid. The type and geometry of the amino acid are determined by the argument, which has to be either a geometry object or a single-letter amino acid code. The amino acid will be placed into chain A of model 0.""" if isinstance(residue, Geo): geo = residue elif isinstance(residue, str): geo = geometry(residue) else: raise ValueError("Invalid residue argument:", residue) segID = 1 AA = geo.residue_name CA_N_length = geo.CA_N_length CA_C_length = geo.CA_C_length N_CA_C_angle = geo.N_CA_C_angle CA_coord = np.array([0.0, 0.0, 0.0]) C_coord = np.array([CA_C_length, 0, 0]) N_coord = np.array([ CA_N_length * math.cos(N_CA_C_angle * (math.pi / 180.0)), CA_N_length * math.sin(N_CA_C_angle * (math.pi / 180.0)), 0, ]) N = Atom("N", N_coord, 0.0, 1.0, " ", " N", 0, "N") # Check if the peptide is capped or not if geo.residue_name == "ACE": CA = Atom("CH3", CA_coord, 0.0, 1.0, " ", " CH3", 0, "C") else: CA = Atom("CA", CA_coord, 0.0, 1.0, " ", " CA", 0, "C") C = Atom("C", C_coord, 0.0, 1.0, " ", " C", 0, "C") ##Create Carbonyl atom (to be moved later) C_O_length = geo.C_O_length CA_C_O_angle = geo.CA_C_O_angle N_CA_C_O_diangle = geo.N_CA_C_O_diangle carbonyl = calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle) O = Atom("O", carbonyl, 0.0, 1.0, " ", " O", 0, "O") res = make_res_of_type(segID, N, CA, C, O, geo) cha = Chain("A") cha.add(res) mod = Model(0) mod.add(cha) struc = Structure("X") struc.add(mod) return struc
def add_dummy_structure(self): """Adds a dummy atom of zero coordinates to mark a gap in visualisation software""" dummy_atom = Atom('DUM', np.zeros(3), 0, 1, ' ', 'DUM', -999) dummy_residue = Residue((' ', -1 * self.chiral_id, ' '), 'DUM', '?') dummy_residue.add(dummy_atom) dummy_chain = Chain('?') dummy_chain.add(dummy_residue) self.dummy_structure = dummy_residue return True
def initialize_res(residue): '''Creates a new structure containing a single amino acid. The type and geometry of the amino acid are determined by the argument, which has to be either a geometry object or a single-letter amino acid code. The amino acid will be placed into chain A of model 0.''' if isinstance( residue, Geo ): geo = residue else: geo= Geo(residue) segID=1 AA= geo.residue_name CA_N_length=geo.CA_N_length CA_C_length=geo.CA_C_length N_CA_C_angle=geo.N_CA_C_angle CA_coord= np.array([0.,0.,0.]) C_coord= np.array([CA_C_length,0,0]) N_coord = np.array([CA_N_length*math.cos(N_CA_C_angle*(math.pi/180.0)),CA_N_length*math.sin(N_CA_C_angle*(math.pi/180.0)),0]) N= Atom("N", N_coord, 0.0 , 1.0, " "," N", 0, "N") CA=Atom("CA", CA_coord, 0.0 , 1.0, " "," CA", 0,"C") C= Atom("C", C_coord, 0.0, 1.0, " ", " C",0,"C") ##Create Carbonyl atom (to be moved later) C_O_length=geo.C_O_length CA_C_O_angle=geo.CA_C_O_angle N_CA_C_O_diangle=geo.N_CA_C_O_diangle carbonyl=calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle) O= Atom("O",carbonyl , 0.0 , 1.0, " "," O", 0, "O") res=makeRes(segID, N, CA, C, O, geo) cha= Chain('A') cha.add(res) mod= Model(0) mod.add(cha) struc= Structure('X') struc.add(mod) return struc
def __make_structure_from_residues__(self, residues): """ Makes a Structure object either from a pdbfile or a list of residues """ # KR: this probably can be outsourced to another module. struct = Structure('s') model = Model('m') n_chain = 1 chain = Chain('c%i' % n_chain) for residue in residues: if chain.has_id(residue.id): model.add(chain) n_chain += 1 chain = Chain('c%i' % n_chain) chain.add(residue) model.add(chain) struct.add(model) return struct
def select_structure(selector, structure): new_structure = Structure(structure.id) for model in structure: if not selector.accept_model(model): continue new_model = Model(model.id, model.serial_num) new_structure.add(new_model) for chain in model: if not selector.accept_chain(chain): continue new_chain = Chain(chain.id) new_model.add(new_chain) for residue in chain: if not selector.accept_residue(residue): continue new_residue = Residue(residue.id, residue.resname, residue.segid) new_chain.add(new_residue) for atom in residue: if selector.accept_atom(atom): new_residue.add(atom) return new_structure
def createPDBFile(self): "Create test CIF file with 12 Atoms in icosahedron vertexes" from Bio.PDB.Structure import Structure from Bio.PDB.Model import Model from Bio.PDB.Chain import Chain from Bio.PDB.Residue import Residue from Bio.PDB.Atom import Atom from Bio.PDB.mmcifio import MMCIFIO import os CIFFILENAME = "/tmp/out.cif" # create atom struct with ico simmety (i222r) icosahedron = Icosahedron(circumscribed_radius=100, orientation='222r') pentomVectorI222r = icosahedron.getVertices() # create biopython object structure = Structure('result') # structure_id model = Model(1, 1) # model_id,serial_num structure.add(model) chain = Chain('A') # chain Id model.add(chain) for i, v in enumerate(pentomVectorI222r, 1): res_id = (' ', i, ' ') # first arg ' ' -> aTOm else heteroatom res_name = "ALA" #+ str(i) # define name of residue res_segid = ' ' residue = Residue(res_id, res_name, res_segid) chain.add(residue) # ATOM name, coord, bfactor, occupancy, altloc, fullname, serial_number, # element=None) atom = Atom('CA', v, 0., 1., " ", " CA ", i, "C") residue.add(atom) io = MMCIFIO() io.set_structure(structure) # delete file if exists if os.path.exists(CIFFILENAME): os.remove(CIFFILENAME) io.save(CIFFILENAME) return CIFFILENAME
def create_structure(coords, pdb_type, remove_masked): """Create the structure. Args: coords: 3D coordinates of structure pdb_type: predict or actual structure remove_masked: whether to include masked atoms. If false, the masked atoms have coordinates of [0,0,0]. Returns: structure """ name = protein.id_ structure = Structure(name) model = Model(0) chain = Chain('A') for i, residue in enumerate(protein.primary): residue = AA_LETTERS[residue] if int(protein.mask[i]) == 1 or remove_masked == False: new_residue = Residue((' ', i + 1, ' '), residue, ' ') j = 3 * i atom_list = ['N', 'CA', 'CB'] for k, atom in enumerate(atom_list): new_atom = Atom(name=atom, coord=coords[j + k, :], bfactor=0, occupancy=1, altloc=' ', fullname=" {} ".format(atom), serial_number=0) new_residue.add(new_atom) chain.add(new_residue) model.add(chain) structure.add(model) io = PDBIO() io.set_structure(structure) io.save(save_dir + name + '_' + pdb_type + '.pdb') return structure
def renumberChain(self, chainID, offset=0, modelID='0', filename="output.mmcif"): # get chain object chain = self.structure[modelID][chainID] # remove chain from model self.structure[modelID].detach_child(chainID) from Bio.PDB.Chain import Chain # create new chain newChain = Chain(chainID) for residue in chain: # remove residue, otherwise we cannot renumber it residue.detach_parent() rId = residue.id res_id = list(rId) res_id[1] = res_id[1] + offset if res_id[1] < 0: raise ValueError('Residue number cant be <= 0') residue.id = tuple(res_id) newChain.add(residue) self.structure[modelID].add(newChain) self.write(filename)
class StructureBuilder: """Deals with constructing the Structure object. The StructureBuilder class is used by the PDBParser classes to translate a file to a Structure object. """ def __init__(self): """Initialize the class.""" self.line_counter = 0 self.header = {} def _is_completely_disordered(self, residue): """Return 1 if all atoms in the residue have a non blank altloc (PRIVATE).""" atom_list = residue.get_unpacked_list() for atom in atom_list: altloc = atom.get_altloc() if altloc == " ": return 0 return 1 # Public methods called by the Parser classes def set_header(self, header): """Set header.""" self.header = header def set_line_counter(self, line_counter): """Tracks line in the PDB file that is being parsed. Arguments: - line_counter - int """ self.line_counter = line_counter def init_structure(self, structure_id): """Initialize a new Structure object with given id. Arguments: - id - string """ self.structure = Structure(structure_id) def init_model(self, model_id, serial_num=None): """Create a new Model object with given id. Arguments: - id - int - serial_num - int """ self.model = Model(model_id, serial_num) self.structure.add(self.model) def init_chain(self, chain_id): """Create a new Chain object with given id. Arguments: - chain_id - string """ if self.model.has_id(chain_id): self.chain = self.model[chain_id] warnings.warn( "WARNING: Chain %s is discontinuous at line %i." % (chain_id, self.line_counter), PDBConstructionWarning, ) else: self.chain = Chain(chain_id) self.model.add(self.chain) def init_seg(self, segid): """Flag a change in segid. Arguments: - segid - string """ self.segid = segid def init_residue(self, resname, field, resseq, icode): """Create a new Residue object. Arguments: - resname - string, e.g. "ASN" - field - hetero flag, "W" for waters, "H" for hetero residues, otherwise blank. - resseq - int, sequence identifier - icode - string, insertion code """ if field != " ": if field == "H": # The hetero field consists of H_ + the residue name (e.g. H_FUC) field = "H_" + resname res_id = (field, resseq, icode) if field == " ": if self.chain.has_id(res_id): # There already is a residue with the id (field, resseq, icode). # This only makes sense in the case of a point mutation. warnings.warn( "WARNING: Residue ('%s', %i, '%s') redefined at line %i." % (field, resseq, icode, self.line_counter), PDBConstructionWarning, ) duplicate_residue = self.chain[res_id] if duplicate_residue.is_disordered() == 2: # The residue in the chain is a DisorderedResidue object. # So just add the last Residue object. if duplicate_residue.disordered_has_id(resname): # The residue was already made self.residue = duplicate_residue duplicate_residue.disordered_select(resname) else: # Make a new residue and add it to the already # present DisorderedResidue new_residue = Residue(res_id, resname, self.segid) duplicate_residue.disordered_add(new_residue) self.residue = duplicate_residue return else: if resname == duplicate_residue.resname: warnings.warn( "WARNING: Residue ('%s', %i, '%s','%s') already defined " "with the same name at line %i." % (field, resseq, icode, resname, self.line_counter), PDBConstructionWarning, ) self.residue = duplicate_residue return # Make a new DisorderedResidue object and put all # the Residue objects with the id (field, resseq, icode) in it. # These residues each should have non-blank altlocs for all their atoms. # If not, the PDB file probably contains an error. if not self._is_completely_disordered(duplicate_residue): # if this exception is ignored, a residue will be missing self.residue = None raise PDBConstructionException( "Blank altlocs in duplicate residue %s ('%s', %i, '%s')" % (resname, field, resseq, icode) ) self.chain.detach_child(res_id) new_residue = Residue(res_id, resname, self.segid) disordered_residue = DisorderedResidue(res_id) self.chain.add(disordered_residue) disordered_residue.disordered_add(duplicate_residue) disordered_residue.disordered_add(new_residue) self.residue = disordered_residue return self.residue = Residue(res_id, resname, self.segid) self.chain.add(self.residue) def init_atom( self, name, coord, b_factor, occupancy, altloc, fullname, serial_number=None, element=None, pqr_charge=None, radius=None, is_pqr=False, ): """Create a new Atom object. Arguments: - name - string, atom name, e.g. CA, spaces should be stripped - coord - Numeric array (Float0, size 3), atomic coordinates - b_factor - float, B factor - occupancy - float - altloc - string, alternative location specifier - fullname - string, atom name including spaces, e.g. " CA " - element - string, upper case, e.g. "HG" for mercury - pqr_charge - float, atom charge (PQR format) - radius - float, atom radius (PQR format) - is_pqr - boolean, flag to specify if a .pqr file is being parsed """ residue = self.residue # if residue is None, an exception was generated during # the construction of the residue if residue is None: return # First check if this atom is already present in the residue. # If it is, it might be due to the fact that the two atoms have atom # names that differ only in spaces (e.g. "CA.." and ".CA.", # where the dots are spaces). If that is so, use all spaces # in the atom name of the current atom. if residue.has_id(name): duplicate_atom = residue[name] # atom name with spaces of duplicate atom duplicate_fullname = duplicate_atom.get_fullname() if duplicate_fullname != fullname: # name of current atom now includes spaces name = fullname warnings.warn( "Atom names %r and %r differ only in spaces at line %i." % (duplicate_fullname, fullname, self.line_counter), PDBConstructionWarning, ) if not is_pqr: self.atom = Atom( name, coord, b_factor, occupancy, altloc, fullname, serial_number, element, ) elif is_pqr: self.atom = Atom( name, coord, None, None, altloc, fullname, serial_number, element, pqr_charge, radius, ) if altloc != " ": # The atom is disordered if residue.has_id(name): # Residue already contains this atom duplicate_atom = residue[name] if duplicate_atom.is_disordered() == 2: duplicate_atom.disordered_add(self.atom) else: # This is an error in the PDB file: # a disordered atom is found with a blank altloc # Detach the duplicate atom, and put it in a # DisorderedAtom object together with the current # atom. residue.detach_child(name) disordered_atom = DisorderedAtom(name) residue.add(disordered_atom) disordered_atom.disordered_add(self.atom) disordered_atom.disordered_add(duplicate_atom) residue.flag_disordered() warnings.warn( "WARNING: disordered atom found with blank altloc before " "line %i.\n" % self.line_counter, PDBConstructionWarning, ) else: # The residue does not contain this disordered atom # so we create a new one. disordered_atom = DisorderedAtom(name) residue.add(disordered_atom) # Add the real atom to the disordered atom, and the # disordered atom to the residue disordered_atom.disordered_add(self.atom) residue.flag_disordered() else: # The atom is not disordered residue.add(self.atom) def set_anisou(self, anisou_array): """Set anisotropic B factor of current Atom.""" self.atom.set_anisou(anisou_array) def set_siguij(self, siguij_array): """Set standard deviation of anisotropic B factor of current Atom.""" self.atom.set_siguij(siguij_array) def set_sigatm(self, sigatm_array): """Set standard deviation of atom position of current Atom.""" self.atom.set_sigatm(sigatm_array) def get_structure(self): """Return the structure.""" # first sort everything # self.structure.sort() # Add the header dict self.structure.header = self.header return self.structure def set_symmetry(self, spacegroup, cell): """Set symmetry.""" pass
class StructureBuilder(object): """ Deals with contructing the Structure object. The StructureBuilder class is used by the PDBParser classes to translate a file to a Structure object. """ def __init__(self): self.line_counter=0 self.header={} def _is_completely_disordered(self, residue): "Return 1 if all atoms in the residue have a non blank altloc." atom_list=residue.get_unpacked_list() for atom in atom_list: altloc=atom.get_altloc() if altloc==" ": return 0 return 1 # Public methods called by the Parser classes def set_header(self, header): self.header=header def set_line_counter(self, line_counter): """ The line counter keeps track of the line in the PDB file that is being parsed. Arguments: o line_counter - int """ self.line_counter=line_counter def init_structure(self, structure_id): """Initiate a new Structure object with given id. Arguments: o id - string """ self.structure=Structure(structure_id) def init_model(self, model_id, serial_num = None): """Initiate a new Model object with given id. Arguments: o id - int o serial_num - int """ self.model=Model(model_id,serial_num) self.structure.add(self.model) def init_chain(self, chain_id): """Initiate a new Chain object with given id. Arguments: o chain_id - string """ if self.model.has_id(chain_id): self.chain=self.model[chain_id] warnings.warn("WARNING: Chain %s is discontinuous at line %i." % (chain_id, self.line_counter), PDBConstructionWarning) else: self.chain=Chain(chain_id) self.model.add(self.chain) def init_seg(self, segid): """Flag a change in segid. Arguments: o segid - string """ self.segid=segid def init_residue(self, resname, field, resseq, icode): """ Initiate a new Residue object. Arguments: o resname - string, e.g. "ASN" o field - hetero flag, "W" for waters, "H" for hetero residues, otherwise blank. o resseq - int, sequence identifier o icode - string, insertion code """ if field!=" ": if field=="H": # The hetero field consists of H_ + the residue name (e.g. H_FUC) field="H_"+resname res_id=(field, resseq, icode) if field==" ": if self.chain.has_id(res_id): # There already is a residue with the id (field, resseq, icode). # This only makes sense in the case of a point mutation. warnings.warn("WARNING: Residue ('%s', %i, '%s') " "redefined at line %i." % (field, resseq, icode, self.line_counter), PDBConstructionWarning) duplicate_residue=self.chain[res_id] if duplicate_residue.is_disordered()==2: # The residue in the chain is a DisorderedResidue object. # So just add the last Residue object. if duplicate_residue.disordered_has_id(resname): # The residue was already made self.residue=duplicate_residue duplicate_residue.disordered_select(resname) else: # Make a new residue and add it to the already # present DisorderedResidue new_residue=Residue(res_id, resname, self.segid) duplicate_residue.disordered_add(new_residue) self.residue=duplicate_residue return else: # Make a new DisorderedResidue object and put all # the Residue objects with the id (field, resseq, icode) in it. # These residues each should have non-blank altlocs for all their atoms. # If not, the PDB file probably contains an error. if not self._is_completely_disordered(duplicate_residue): # if this exception is ignored, a residue will be missing self.residue=None raise PDBConstructionException( "Blank altlocs in duplicate residue %s ('%s', %i, '%s')" % (resname, field, resseq, icode)) self.chain.detach_child(res_id) new_residue=Residue(res_id, resname, self.segid) disordered_residue=DisorderedResidue(res_id) self.chain.add(disordered_residue) disordered_residue.disordered_add(duplicate_residue) disordered_residue.disordered_add(new_residue) self.residue=disordered_residue return residue=Residue(res_id, resname, self.segid) self.chain.add(residue) self.residue=residue def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname, serial_number=None, element=None): """ Initiate a new Atom object. Arguments: o name - string, atom name, e.g. CA, spaces should be stripped o coord - Numeric array (Float0, size 3), atomic coordinates o b_factor - float, B factor o occupancy - float o altloc - string, alternative location specifier o fullname - string, atom name including spaces, e.g. " CA " o element - string, upper case, e.g. "HG" for mercury """ residue=self.residue # if residue is None, an exception was generated during # the construction of the residue if residue is None: return # First check if this atom is already present in the residue. # If it is, it might be due to the fact that the two atoms have atom # names that differ only in spaces (e.g. "CA.." and ".CA.", # where the dots are spaces). If that is so, use all spaces # in the atom name of the current atom. if residue.has_id(name): duplicate_atom=residue[name] # atom name with spaces of duplicate atom duplicate_fullname=duplicate_atom.get_fullname() if duplicate_fullname!=fullname: # name of current atom now includes spaces name=fullname warnings.warn("Atom names %r and %r differ " "only in spaces at line %i." % (duplicate_fullname, fullname, self.line_counter), PDBConstructionWarning) atom=self.atom=Atom(name, coord, b_factor, occupancy, altloc, fullname, serial_number, element) if altloc!=" ": # The atom is disordered if residue.has_id(name): # Residue already contains this atom duplicate_atom=residue[name] if duplicate_atom.is_disordered()==2: duplicate_atom.disordered_add(atom) else: # This is an error in the PDB file: # a disordered atom is found with a blank altloc # Detach the duplicate atom, and put it in a # DisorderedAtom object together with the current # atom. residue.detach_child(name) disordered_atom=DisorderedAtom(name) residue.add(disordered_atom) disordered_atom.disordered_add(atom) disordered_atom.disordered_add(duplicate_atom) residue.flag_disordered() warnings.warn("WARNING: disordered atom found " "with blank altloc before line %i.\n" % self.line_counter, PDBConstructionWarning) else: # The residue does not contain this disordered atom # so we create a new one. disordered_atom=DisorderedAtom(name) residue.add(disordered_atom) # Add the real atom to the disordered atom, and the # disordered atom to the residue disordered_atom.disordered_add(atom) residue.flag_disordered() else: # The atom is not disordered residue.add(atom) def set_anisou(self, anisou_array): "Set anisotropic B factor of current Atom." self.atom.set_anisou(anisou_array) def set_siguij(self, siguij_array): "Set standard deviation of anisotropic B factor of current Atom." self.atom.set_siguij(siguij_array) def set_sigatm(self, sigatm_array): "Set standard deviation of atom position of current Atom." self.atom.set_sigatm(sigatm_array) def get_structure(self): "Return the structure." # first sort everything # self.structure.sort() # Add the header dict self.structure.header=self.header return self.structure def set_symmetry(self, spacegroup, cell): pass
bfactors = zeros(len(points)) model = Model(1) chain = Chain("A") structure = Structure("ref") num_count = 0 for i in range(0,shape(points)[0]): num_count = num_count +1 res_id = (' ',num_count,' ') residue = Residue(res_id,'ALA',' ') cur_coord = tuple(points[i]) bfactor = bfactors[i] atom = Atom('CA',cur_coord,bfactor,0,' ','CA',num_count,'C') residue.add(atom) chain.add(residue) model.add(chain) structure.add(model) # -------------------------------------------------------------------- io=PDBIO() io.set_structure(structure) if ( args['dst'] is None): fn = sys.stdout io.save(fn) if ( args['link'] ): for i in range(1,shape(points)[0]): fn.write( "CONECT%5d%5d\n" % (i, i+1)) else: fn = args['dst'] io.save(fn)
def initialize_res(residue): '''Creates a new structure containing a single amino acid. The type and geometry of the amino acid are determined by the argument, which has to be either a geometry object or a single-letter amino acid code. The amino acid will be placed into chain A of model 0.''' if isinstance( residue, Geo ): geo = residue else: geo=geometry(residue) segID=1 AA= geo.residue_name CA_N_length=geo.CA_N_length CA_C_length=geo.CA_C_length N_CA_C_angle=geo.N_CA_C_angle CA_coord= numpy.array([0.,0.,0.]) C_coord= numpy.array([CA_C_length,0,0]) N_coord = numpy.array([CA_N_length*math.cos(N_CA_C_angle*(math.pi/180.0)),CA_N_length*math.sin(N_CA_C_angle*(math.pi/180.0)),0]) N= Atom("N", N_coord, 0.0 , 1.0, " "," N", 0, "N") CA=Atom("CA", CA_coord, 0.0 , 1.0, " "," CA", 0,"C") C= Atom("C", C_coord, 0.0, 1.0, " ", " C",0,"C") ##Create Carbonyl atom (to be moved later) C_O_length=geo.C_O_length CA_C_O_angle=geo.CA_C_O_angle N_CA_C_O_diangle=geo.N_CA_C_O_diangle carbonyl=calculateCoordinates(N, CA, C, C_O_length, CA_C_O_angle, N_CA_C_O_diangle) O= Atom("O",carbonyl , 0.0 , 1.0, " "," O", 0, "O") if(AA=='G'): res=makeGly(segID, N, CA, C, O, geo) elif(AA=='A'): res=makeAla(segID, N, CA, C, O, geo) elif(AA=='S'): res=makeSer(segID, N, CA, C, O, geo) elif(AA=='C'): res=makeCys(segID, N, CA, C, O, geo) elif(AA=='V'): res=makeVal(segID, N, CA, C, O, geo) elif(AA=='I'): res=makeIle(segID, N, CA, C, O, geo) elif(AA=='L'): res=makeLeu(segID, N, CA, C, O, geo) elif(AA=='T'): res=makeThr(segID, N, CA, C, O, geo) elif(AA=='R'): res=makeArg(segID, N, CA, C, O, geo) elif(AA=='K'): res=makeLys(segID, N, CA, C, O, geo) elif(AA=='D'): res=makeAsp(segID, N, CA, C, O, geo) elif(AA=='E'): res=makeGlu(segID, N, CA, C, O, geo) elif(AA=='N'): res=makeAsn(segID, N, CA, C, O, geo) elif(AA=='Q'): res=makeGln(segID, N, CA, C, O, geo) elif(AA=='M'): res=makeMet(segID, N, CA, C, O, geo) elif(AA=='H'): res=makeHis(segID, N, CA, C, O, geo) elif(AA=='P'): res=makePro(segID, N, CA, C, O, geo) elif(AA=='F'): res=makePhe(segID, N, CA, C, O, geo) elif(AA=='Y'): res=makeTyr(segID, N, CA, C, O, geo) elif(AA=='W'): res=makeTrp(segID, N, CA, C, O, geo) else: res=makeGly(segID, N, CA, C, O, geo) cha= Chain('A') cha.add(res) mod= Model(0) mod.add(cha) struc= Structure('X') struc.add(mod) return struc
ref_ptsfilename = "K562.pts" refid = "ref" structure = Structure(refid) model_ref = Model(1) chain_ref = Chain("A") points_ref = ReadXYZ(ref_ptsfilename,scale) num_count = 0 for i in range(0,shape(points_ref[IndexList])[0]): num_count = num_count +1 res_id = (' ',num_count,' ') residue = Residue(res_id,'ALA',' ') cur_coord = tuple(points_ref[IndexList[i]]) atom = Atom('CA',cur_coord,0,0,' ',num_count,num_count,'C') residue.add(atom) chain_ref.add(residue) model_ref.add(chain_ref) structure.add(model_ref) #-------------------------------------------------------------------- altid = "alt" structure_alt = Structure(refid) model_alt = Model(2) chain_alt = Chain("A") points_alt = ReadXYZ(alt_ptsfilename,scale) num_count = 0 for i in range(0,shape(points_alt[IndexList])[0]): num_count = num_count +1 res_id = (' ',num_count,' ') residue = Residue(res_id,'ALA',' ')
class TestPdbalign(unittest.TestCase): # Need to reduce gap penalty to make test alignments work aligner = Aligner(BLOSUM62.load(), do_codon=False, open_insertion=-1, open_deletion=-1) def setUp(self): self.chain = Chain("A") residues = [ Residue(0, resname="Trp", segid=0), Residue(0, resname="His", segid=1), Residue(0, resname="Ser", segid=2), Residue(0, resname="Val", segid=3), Residue(0, resname="His", segid=4),] for r in residues: self.chain.add(r) def test_align_and_index(self): problems = ( (Seq("AHSVH"), Seq("AHVH"), [0, 1, -1, 2, 3]), (Seq("AHVH"), Seq("AHSVH"), [0, 1, 3, 4]), (Seq("AHSVH"), Seq("AHSVH"), [0, 1, 2, 3, 4]), (Seq("-HSVH"), Seq("AHSVH"), [-1, 1, 2, 3, 4]), (Seq("A-SVH"), Seq("AHSVH"), [0, -1, 2, 3, 4]), (Seq("AH-VH"), Seq("AHSVH"), [0, 1, -1, 3, 4]), (Seq("AHS-H"), Seq("AHSVH"), [0, 1, 2, -1, 4]), (Seq("AHSV-"), Seq("AHSVH"), [0, 1, 2, 3, -1]), (Seq("AHSVHCCCCCCFPVW"), Seq("AHSVHFPVW"), [0, 1, 2, 3, 4, -1, -1, -1, -1, -1, -1, 5, 6, 7, 8]), ) for s, p, e in problems: result = align_and_index(s, p, missing=-1, aligner=self.aligner) self.assertEqual(e, result) def test_align_chains_msa(self): sequences = [Seq("AHSVH"), Seq("AH-VH"), Seq("A-SVH")] indices = align_chains_msa(sequences, [self.chain], aligner=self.aligner) expected = np.array([[0, 1, 2, 3, 4]]) self.assertTrue(np.all(indices == expected)) def test_align_chains_msa_no_consensus(self): sequences = [Seq("AHSV"), Seq("AHSH")] indices = align_chains_msa(sequences, [self.chain], aligner=self.aligner) expected = np.array([[0, 1, 2, -1]]) self.assertTrue(np.all(indices == expected)) def test_align_chains_msa_leading_gaps(self): sequences = [Seq("FFWHSVH"), Seq("IIWH-VH"), Seq("WWW-SVH")] indices = align_chains_msa(sequences, [self.chain], aligner=self.aligner) expected = np.array([[-1, -1, 0, 1, 2, 3, 4]]) self.assertTrue(np.all(indices == expected)) def test_compute_distance_matrix(self): c1 = np.array([[0, 0], [np.nan, np.nan], [1, 1], [1, 0]]) c2 = c1.copy() c1[:, 0] += 1.5 c1[:, 1] += 1 coords = np.hstack([c1, c2]).reshape((4, 2, 2)) expected = np.array([[0, 5, 0.5, 1], [5, 0, 5, np.inf], [0.5, 5, 0, 1], [1, np.inf, 1, 0]]) result = compute_distance_matrix(coords, default_dist=5) self.assertTrue(np.all(expected == result)) def test_consensus(self): flag = -1 problems = (((0, 0, 1, 1), flag), ((0, 0, 0, 1), 0), ((0, 0, 0, 0), 0), (iter([]), flag), ((), flag)) for it, exp in problems: result = consensus(it, flag=-1) self.assertEqual(exp, result)
def undo(cls, input, outfile=None, ligand_chain=None, residue_dict=None, write=True): if residue_dict is None: residue_dict = cls.extract_residue_dict(input) if not residue_dict: raise CombineChainError("No residue dict") parser = PDB.PDBParser(QUIET=True) structure = parser.get_structure( os.path.splitext(os.path.basename(input))[0], input) if len(structure.child_list) > 1: raise CombineChainError("Input has more than one model: %s" % structure.child_list) model = structure.child_list[0] chain_id_list = [c.id for c in model.child_list] chain_id_set = set(chain_id_list) if len(chain_id_list) != len(chain_id_set): raise CombineChainError("Non-unique chain IDs") if ligand_chain is not None: try: chain_id_set.remove(ligand_chain) except ValueError: raise CombineChainError("Ligand chain %s not found in %s", ligand_chain, input) if len(chain_id_set) > 1: logging.debug(input) raise CombineChainError("Input has more than one chain: %s" % model.child_list) receptor_chain = chain_id_set.pop() chain = model[receptor_chain] residue_list = sorted(residue_dict.keys()) for x, residue in enumerate(residue_list): data = residue_dict[residue] chainid = data['CHAIN'] try: next_change = residue_list[x + 1] except IndexError: next_change = None new_chain = Chain(chainid) model.add(new_chain) for res in chain.get_list(): resseq = res.id[1] if (next_change is None or resseq < next_change) and resseq >= residue: chain.detach_child(res.id) new_chain.add(res) if write: if outfile is None: fileparts = list(os.path.splitext(input)) fileparts.insert(1, "_split") outfile = "".join(fileparts) if not os.path.isabs(outfile): outdir = os.path.dirname(input) outfile = os.path.join(outdir, outfile) io = PDB.PDBIO() io.set_structure(structure) io.save(outfile) return outfile else: return structure assert False
}, { 'name': 'C5', 'coord': PDB.Atom.array([66.402, 44.364, 11.291], 'f'), 'bfactor': 44.20, 'occupancy': 1.0, 'altloc': ' ', 'fullname': 'C5', 'serial_number': 7 }, { 'name': 'C6', 'coord': PDB.Atom.array([65.095, 44.589, 11.192], 'f'), 'bfactor': 44.33, 'occupancy': 1.0, 'altloc': ' ', 'fullname': 'C6', 'serial_number': 8 }] my_structure.add(my_model) my_model.add(my_chain) my_chain.add(my_residue) for atom in atoms: my_atom = Atom(atom['name'], atom['coord'], atom['bfactor'], atom['occupancy'], atom['altloc'], atom['fullname'], atom['serial_number']) my_residue.add(my_atom) out = PDBIO() out.set_structure(my_structure) out.save('my_new_structure.pdb')