def build_structure(atoms): sb = StructureBuilder() sb.init_structure('pdb') sb.init_seg(' ') sb.init_model(0) sb.init_chain('A') i = 1 for atom in atoms: sb.init_residue('DUM', ' ', i, ' ') sb.structure[0]['A'].child_list[i - 1].add(atom) i += 1 return sb.structure
def set_structure(self, pdb_object): """Check what the user is providing and build a structure.""" # The idea here is to build missing upstream components of # the SMCRA object representation. E.g., if the user provides # a Residue, build Structure/Model/Chain. if pdb_object.level == "S": structure = pdb_object else: # Not a Structure sb = StructureBuilder() sb.init_structure("pdb") sb.init_seg(" ") if pdb_object.level == "M": sb.structure.add(pdb_object.copy()) self.structure = sb.structure else: # Not a Model sb.init_model(0) if pdb_object.level == "C": sb.structure[0].add(pdb_object.copy()) else: # Not a Chain chain_id = "A" # default sb.init_chain(chain_id) if pdb_object.level == "R": # Residue # Residue extracted from a larger structure? if pdb_object.parent is not None: og_chain_id = pdb_object.parent.id sb.structure[0][chain_id].id = og_chain_id chain_id = og_chain_id sb.structure[0][chain_id].add(pdb_object.copy()) else: # Atom sb.init_residue("DUM", " ", 1, " ") # Dummy residue sb.structure[0][chain_id].child_list[0].add( pdb_object.copy()) # Fix chain identifier if Atom has grandparents. try: og_chain_id = pdb_object.parent.parent.id except AttributeError: # pdb_object.parent == None pass else: sb.structure[0][chain_id].id = og_chain_id # Return structure structure = sb.structure self.structure = structure
def pdb_writer(graph, results, dir_name="templates"): PDB_CODE = graph.protein_name structure = graph.structure path = "%s/%s/" % (dir_name, PDB_CODE) if len(results) > 0: if not os.path.exists(path): os.makedirs(path) else: print 'No results' return top = min(NUM_OF_RESULTS_TO_SAVE, len(results)) i = 0 for key, value in results[:top]: # TODO - save as different chains sb = StructureBuilder() sb.init_structure(PDB_CODE) sb.init_model(0) sb.init_chain('X') for node_id in key[0]: for res in graph.nodes[node_id].residues: sb.init_seg(res.get_segid()) sb.init_residue(res.get_resname(), res.get_id()[0], res.get_id()[1], res.get_id()[2]) for atom in res: sb.init_atom(atom.get_name(), atom.get_coord(), atom.get_bfactor(), atom.get_occupancy(), atom.get_altloc(), atom.get_fullname()) sb.init_chain('Y') for node_id in key[1]: for res in graph.nodes[node_id].residues: sb.init_residue(res.get_resname(), res.get_id()[0], res.get_id()[1], res.get_id()[2]) for atom in res: sb.init_atom(atom.get_name(), atom.get_coord(), atom.get_bfactor(), atom.get_occupancy(), atom.get_altloc(), atom.get_fullname()) filename = path + "interface%d.pdb" % (i) io = PDBIO() io.set_structure(sb.get_structure()) io.save(filename) i += 1 return 1
def write_helical_axes(self, filename): """Writes helical axes in PDB format.""" sb = StructureBuilder() sb.init_structure('') sb.init_model('') for cpos, chain in enumerate(self.chains): sb.init_chain(str(cpos)) sb.init_seg('') for pos, i in enumerate(chain.res[1:-1]): sb.init_residue('ALA', ' ', pos, ' ') sb.init_atom('CA', i.O._ar, 0, 0, " ", ' CA ', 1) io = PDBIO() io.set_structure(sb.structure) io.save(filename)
def set_structure(self, pdb_object): """Check what object the user is providing and build a structure.""" # This is duplicated from the PDBIO class if pdb_object.level == "S": structure = pdb_object else: sb = StructureBuilder() sb.init_structure('pdb') sb.init_seg(' ') # Build parts as necessary if pdb_object.level == "M": sb.structure.add(pdb_object) self.structure = sb.structure else: sb.init_model(0) if pdb_object.level == "C": sb.structure[0].add(pdb_object) else: sb.init_chain('A') if pdb_object.level == "R": try: parent_id = pdb_object.parent.id sb.structure[0]['A'].id = parent_id except ValueError: pass sb.structure[0]['A'].add(pdb_object) else: # Atom sb.init_residue('DUM', ' ', 1, ' ') try: parent_id = pdb_object.parent.parent.id sb.structure[0]['A'].id = parent_id except ValueError: pass sb.structure[0]['A'].child_list[0].add(pdb_object) # Return structure structure = sb.structure self.structure = structure
def set_structure(self, pdb_object): """Check what the user is providing and build a structure.""" if pdb_object.level == "S": structure = pdb_object else: sb = StructureBuilder() sb.init_structure("pdb") sb.init_seg(" ") # Build parts as necessary if pdb_object.level == "M": sb.structure.add(pdb_object.copy()) self.structure = sb.structure else: sb.init_model(0) if pdb_object.level == "C": sb.structure[0].add(pdb_object.copy()) else: sb.init_chain("A") if pdb_object.level == "R": try: parent_id = pdb_object.parent.id sb.structure[0]["A"].id = parent_id except Exception: pass sb.structure[0]["A"].add(pdb_object.copy()) else: # Atom sb.init_residue("DUM", " ", 1, " ") try: parent_id = pdb_object.parent.parent.id sb.structure[0]["A"].id = parent_id except Exception: pass sb.structure[0]["A"].child_list[0].add( pdb_object.copy()) # Return structure structure = sb.structure self.structure = structure
def set_structure(self, pdb_object): """Check what the user is providing and build a structure.""" if pdb_object.level == "S": structure = pdb_object else: sb = StructureBuilder() sb.init_structure('pdb') sb.init_seg(' ') # Build parts as necessary if pdb_object.level == "M": sb.structure.add(pdb_object.copy()) self.structure = sb.structure else: sb.init_model(0) if pdb_object.level == "C": sb.structure[0].add(pdb_object.copy()) else: sb.init_chain('A') if pdb_object.level == "R": try: parent_id = pdb_object.parent.id sb.structure[0]['A'].id = parent_id except Exception: pass sb.structure[0]['A'].add(pdb_object.copy()) else: # Atom sb.init_residue('DUM', ' ', 1, ' ') try: parent_id = pdb_object.parent.parent.id sb.structure[0]['A'].id = parent_id except Exception: pass sb.structure[0]['A'].child_list[0].add(pdb_object.copy()) # Return structure structure = sb.structure self.structure = structure
def set_structure(self, pdb_object): # Check what the user is providing and build a structure appropriately if pdb_object.level == "S": structure = pdb_object else: sb = StructureBuilder() sb.init_structure('pdb') sb.init_seg(' ') # Build parts as necessary if pdb_object.level == "M": sb.structure.add(pdb_object) self.structure = sb.structure else: sb.init_model(0) if pdb_object.level == "C": sb.structure[0].add(pdb_object) else: sb.init_chain('A') if pdb_object.level == "R": try: parent_id = pdb_object.parent.id sb.structure[0]['A'].id = parent_id except Exception: pass sb.structure[0]['A'].add(pdb_object) else: # Atom sb.init_residue('DUM', ' ', 1, ' ') try: parent_id = pdb_object.parent.parent.id sb.structure[0]['A'].id = parent_id except Exception: pass sb.structure[0]['A'].child_list[0].add(pdb_object) # Return structure structure = sb.structure self.structure = structure
def set_structure(self, pdb_object): # Check what the user is providing and build a structure appropriately if pdb_object.level == "S": structure = pdb_object else: sb = StructureBuilder() sb.init_structure("pdb") sb.init_seg(" ") # Build parts as necessary if pdb_object.level == "M": sb.structure.add(pdb_object) self.structure = sb.structure else: sb.init_model(0) if pdb_object.level == "C": sb.structure[0].add(pdb_object) else: sb.init_chain("A") if pdb_object.level == "R": try: parent_id = pdb_object.parent.id sb.structure[0]["A"].id = parent_id except Exception: pass sb.structure[0]["A"].add(pdb_object) else: # Atom sb.init_residue("DUM", " ", 1, " ") try: parent_id = pdb_object.parent.parent.id sb.structure[0]["A"].id = parent_id except Exception: pass sb.structure[0]["A"].child_list[0].add(pdb_object) # Return structure structure = sb.structure self.structure = structure
class StructureDecoder(object): """Class to pass the data from mmtf-python into a Biopython data structure.""" def __init__(self): """Initialize the class.""" self.this_type = "" def init_structure(self, total_num_bonds, total_num_atoms, total_num_groups, total_num_chains, total_num_models, structure_id): """Initialize the structure object. :param total_num_bonds: the number of bonds in the structure :param total_num_atoms: the number of atoms in the structure :param total_num_groups: the number of groups in the structure :param total_num_chains: the number of chains in the structure :param total_num_models: the number of models in the structure :param structure_id: the id of the structure (e.g. PDB id) """ self.structure_bulder = StructureBuilder() self.structure_bulder.init_structure(structure_id=structure_id) self.chain_index_to_type_map = {} self.chain_index_to_seq_map = {} self.chain_index_to_description_map = {} self.chain_counter = 0 def set_atom_info(self, atom_name, serial_number, alternative_location_id, x, y, z, occupancy, temperature_factor, element, charge): """Create an atom object an set the information. :param atom_name: the atom name, e.g. CA for this atom :param serial_number: the serial id of the atom (e.g. 1) :param alternative_location_id: the alternative location id for the atom, if present :param x: the x coordiante of the atom :param y: the y coordinate of the atom :param z: the z coordinate of the atom :param occupancy: the occupancy of the atom :param temperature_factor: the temperature factor of the atom :param element: the element of the atom, e.g. C for carbon. According to IUPAC. Calcium is Ca :param charge: the formal atomic charge of the atom """ # MMTF uses "\x00" (the NUL character) to indicate to altloc, so convert # that to the space required by StructureBuilder if alternative_location_id == "\x00": alternative_location_id = " " # Atom_name is in twice - the full_name is with spaces self.structure_bulder.init_atom(str(atom_name), [x, y, z], temperature_factor, occupancy, alternative_location_id, str(atom_name), serial_number=serial_number, element=str(element).upper()) def set_chain_info(self, chain_id, chain_name, num_groups): """Set the chain information. :param chain_id: the asym chain id from mmCIF :param chain_name: the auth chain id from mmCIF :param num_groups: the number of groups this chain has """ # A Bradley - chose to use chain_name (auth_id) as it complies # with current Biopython. Chain_id might be better. self.structure_bulder.init_chain(chain_id=chain_name) if self.chain_index_to_type_map[self.chain_counter] == "polymer": self.this_type = " " elif self.chain_index_to_type_map[self.chain_counter] == "non-polymer": self.this_type = "H" elif self.chain_index_to_type_map[self.chain_counter] == "water": self.this_type = "W" self.chain_counter += 1 def set_entity_info(self, chain_indices, sequence, description, entity_type): """Set the entity level information for the structure. :param chain_indices: the indices of the chains for this entity :param sequence: the one letter code sequence for this entity :param description: the description for this entity :param entity_type: the entity type (polymer,non-polymer,water) """ for chain_ind in chain_indices: self.chain_index_to_type_map[chain_ind] = entity_type self.chain_index_to_seq_map[chain_ind] = sequence self.chain_index_to_description_map[chain_ind] = description def set_group_info(self, group_name, group_number, insertion_code, group_type, atom_count, bond_count, single_letter_code, sequence_index, secondary_structure_type): """Set the information for a group :param group_name: the name of this group, e.g. LYS :param group_number: the residue number of this group :param insertion_code: the insertion code for this group :param group_type: a string indicating the type of group (as found in the chemcomp dictionary. Empty string if none available. :param atom_count: the number of atoms in the group :param bond_count: the number of unique bonds in the group :param single_letter_code: the single letter code of the group :param sequence_index: the index of this group in the sequence defined by the entity :param secondary_structure_type: the type of secondary structure used (types are according to DSSP and number to type mappings are defined in the specification) """ # MMTF uses a NUL character to indicate a blank insertion code, but # StructureBuilder expects a space instead. if insertion_code == "\x00": insertion_code = " " self.structure_bulder.init_seg(' ') self.structure_bulder.init_residue(group_name, self.this_type, group_number, insertion_code) def set_model_info(self, model_id, chain_count): """Set the information for a model. :param model_id: the index for the model :param chain_count: the number of chains in the model """ self.structure_bulder.init_model(model_id) def set_xtal_info(self, space_group, unit_cell): """Set the crystallographic information for the structure. :param space_group: the space group name, e.g. "P 21 21 21" :param unit_cell: an array of length 6 with the unit cell parameters in order: a, b, c, alpha, beta, gamma """ self.structure_bulder.set_symmetry(space_group, unit_cell) def set_header_info(self, r_free, r_work, resolution, title, deposition_date, release_date, experimnetal_methods): """Sets the header information. :param r_free: the measured R-Free for the structure :param r_work: the measure R-Work for the structure :param resolution: the resolution of the structure :param title: the title of the structure :param deposition_date: the deposition date of the structure :param release_date: the release date of the structure :param experimnetal_methods: the list of experimental methods in the structure """ pass def set_bio_assembly_trans(self, bio_assembly_index, input_chain_indices, input_transform): """Set the Bioassembly transformation information. A single bioassembly can have multiple transforms. :param bio_assembly_index: the integer index of the bioassembly :param input_chain_indices: the list of integer indices for the chains of this bioassembly :param input_transform: the list of doubles for the transform of this bioassmbly transform. """ pass def finalize_structure(self): """Any functions needed to cleanup the structure.""" pass def set_group_bond(self, atom_index_one, atom_index_two, bond_order): """Add bonds within a group. :param atom_index_one: the integer atom index (in the group) of the first partner in the bond :param atom_index_two: the integer atom index (in the group) of the second partner in the bond :param bond_order: the integer bond order """ pass def set_inter_group_bond(self, atom_index_one, atom_index_two, bond_order): """Add bonds between groups. :param atom_index_one: the integer atom index (in the structure) of the first partner in the bond :param atom_index_two: the integer atom index (in the structure) of the second partner in the bond :param bond_order: the bond order """ pass
class StructureDecoder(object): """Class to pass the data from mmtf-python into a Biopython data structure.""" def __init__(self): """Initialize the class.""" self.this_type = "" def init_structure(self, total_num_bonds, total_num_atoms, total_num_groups, total_num_chains, total_num_models, structure_id): """Initialize the structure object. :param total_num_bonds: the number of bonds in the structure :param total_num_atoms: the number of atoms in the structure :param total_num_groups: the number of groups in the structure :param total_num_chains: the number of chains in the structure :param total_num_models: the number of models in the structure :param structure_id: the id of the structure (e.g. PDB id) """ self.structure_bulder = StructureBuilder() self.structure_bulder.init_structure(structure_id=structure_id) self.chain_index_to_type_map = {} self.chain_index_to_seq_map = {} self.chain_index_to_description_map = {} self.chain_counter = 0 def set_atom_info(self, atom_name, serial_number, alternative_location_id, x, y, z, occupancy, temperature_factor, element, charge): """Create an atom object an set the information. :param atom_name: the atom name, e.g. CA for this atom :param serial_number: the serial id of the atom (e.g. 1) :param alternative_location_id: the alternative location id for the atom, if present :param x: the x coordiante of the atom :param y: the y coordinate of the atom :param z: the z coordinate of the atom :param occupancy: the occupancy of the atom :param temperature_factor: the temperature factor of the atom :param element: the element of the atom, e.g. C for carbon. According to IUPAC. Calcium is Ca :param charge: the formal atomic charge of the atom """ # MMTF uses "\x00" (the NUL character) to indicate to altloc, so convert # that to the space required by StructureBuilder if alternative_location_id == "\x00": alternative_location_id = " " # Atom_name is in twice - the full_name is with spaces self.structure_bulder.init_atom(str(atom_name), numpy.array((x, y, z), "f"), temperature_factor, occupancy, alternative_location_id, str(atom_name), serial_number=serial_number, element=str(element).upper()) def set_chain_info(self, chain_id, chain_name, num_groups): """Set the chain information. :param chain_id: the asym chain id from mmCIF :param chain_name: the auth chain id from mmCIF :param num_groups: the number of groups this chain has """ # A Bradley - chose to use chain_name (auth_id) as it complies # with current Biopython. Chain_id might be better. self.structure_bulder.init_chain(chain_id=chain_name) if self.chain_index_to_type_map[self.chain_counter] == "polymer": self.this_type = " " elif self.chain_index_to_type_map[self.chain_counter] == "non-polymer": self.this_type = "H" elif self.chain_index_to_type_map[self.chain_counter] == "water": self.this_type = "W" self.chain_counter += 1 def set_entity_info(self, chain_indices, sequence, description, entity_type): """Set the entity level information for the structure. :param chain_indices: the indices of the chains for this entity :param sequence: the one letter code sequence for this entity :param description: the description for this entity :param entity_type: the entity type (polymer,non-polymer,water) """ for chain_ind in chain_indices: self.chain_index_to_type_map[chain_ind] = entity_type self.chain_index_to_seq_map[chain_ind] = sequence self.chain_index_to_description_map[chain_ind] = description def set_group_info(self, group_name, group_number, insertion_code, group_type, atom_count, bond_count, single_letter_code, sequence_index, secondary_structure_type): """Set the information for a group. :param group_name: the name of this group, e.g. LYS :param group_number: the residue number of this group :param insertion_code: the insertion code for this group :param group_type: a string indicating the type of group (as found in the chemcomp dictionary. Empty string if none available. :param atom_count: the number of atoms in the group :param bond_count: the number of unique bonds in the group :param single_letter_code: the single letter code of the group :param sequence_index: the index of this group in the sequence defined by the entity :param secondary_structure_type: the type of secondary structure used (types are according to DSSP and number to type mappings are defined in the specification) """ # MMTF uses a NUL character to indicate a blank insertion code, but # StructureBuilder expects a space instead. if insertion_code == "\x00": insertion_code = " " self.structure_bulder.init_seg(' ') self.structure_bulder.init_residue(group_name, self.this_type, group_number, insertion_code) def set_model_info(self, model_id, chain_count): """Set the information for a model. :param model_id: the index for the model :param chain_count: the number of chains in the model """ self.structure_bulder.init_model(model_id) def set_xtal_info(self, space_group, unit_cell): """Set the crystallographic information for the structure. :param space_group: the space group name, e.g. "P 21 21 21" :param unit_cell: an array of length 6 with the unit cell parameters in order: a, b, c, alpha, beta, gamma """ self.structure_bulder.set_symmetry(space_group, unit_cell) def set_header_info(self, r_free, r_work, resolution, title, deposition_date, release_date, experimnetal_methods): """Set the header information. :param r_free: the measured R-Free for the structure :param r_work: the measure R-Work for the structure :param resolution: the resolution of the structure :param title: the title of the structure :param deposition_date: the deposition date of the structure :param release_date: the release date of the structure :param experimnetal_methods: the list of experimental methods in the structure """ pass def set_bio_assembly_trans(self, bio_assembly_index, input_chain_indices, input_transform): """Set the Bioassembly transformation information. A single bioassembly can have multiple transforms. :param bio_assembly_index: the integer index of the bioassembly :param input_chain_indices: the list of integer indices for the chains of this bioassembly :param input_transform: the list of doubles for the transform of this bioassmbly transform. """ pass def finalize_structure(self): """Any functions needed to cleanup the structure.""" pass def set_group_bond(self, atom_index_one, atom_index_two, bond_order): """Add bonds within a group. :param atom_index_one: the integer atom index (in the group) of the first partner in the bond :param atom_index_two: the integer atom index (in the group) of the second partner in the bond :param bond_order: the integer bond order """ pass def set_inter_group_bond(self, atom_index_one, atom_index_two, bond_order): """Add bonds between groups. :param atom_index_one: the integer atom index (in the structure) of the first partner in the bond :param atom_index_two: the integer atom index (in the structure) of the second partner in the bond :param bond_order: the bond order """ pass
def read_PIC(file: TextIO, verbose: bool = False) -> Structure: """Load Protein Internal Coordinate (.pic) data from file. PIC file format: - comment lines start with # - (optional) PDB HEADER record - idcode and deposition date recommended but optional - deposition date in PDB format or as changed by Biopython - (optional) PDB TITLE record - repeat: - Biopython Residue Full ID - sets residue IDs of returned structure - (optional) PDB N, CA, C ATOM records for chain start - (optional) PIC Hedra records for residue - (optional) PIC Dihedra records for residue - (optional) BFAC records listing AtomKeys and b-factors An improvement would define relative positions for HOH (water) entries. N.B. dihedron (i-1)C-N-CA-CB is ignored in assembly if O exists. C-beta is by default placed using O-C-CA-CB, but O is missing in some PDB file residues, which means the sidechain cannot be placed. The alternate CB path (i-1)C-N-CA-CB is provided to circumvent this, but if this is needed then it must be adjusted in conjunction with PHI ((i-1)C-N-CA-C) as they overlap. (i-1)C-N-CA-CB is included by default in .pic files for consistency and informational (e.g. statistics gathering) purposes, as otherwise the dihedron would only appear in the few cases it is needed for. :param Bio.File file: file name or handle :param bool verbose: complain when lines not as expected :returns: Biopython Structure object, Residues with .internal_coord attributes but no coordinates except for chain start N, CA, C atoms if supplied, **OR** None on parse fail (silent unless verbose=True) """ pdb_hdr_re = re.compile( r"^HEADER\s{4}(?P<cf>.{1,40})" r"(?:\s+(?P<dd>\d\d\d\d-\d\d-\d\d|\d\d-\w\w\w-\d\d))?" r"(?:\s+(?P<id>[0-9A-Z]{4}))?\s*$") # ^\('(?P<pid>\w*)',\s(?P<mdl>\d+),\s'(?P<chn>\w)',\s\('(?P<het>\s|[\w-]+)',\s(?P<pos>\d+),\s'(?P<icode>\s|\w)'\)\)\s(?P<res>[A-Z]{3})\s(\[(?P<segid>[a-zA-z\s]{4})\])?\s*$ pdb_ttl_re = re.compile(r"^TITLE\s{5}(?P<ttl>.+)\s*$") biop_id_re = re.compile(r"^\('(?P<pid>[^\s]*)',\s(?P<mdl>\d+),\s" r"'(?P<chn>\s|\w)',\s\('(?P<het>\s|[\w\s-]+)" r"',\s(?P<pos>-?\d+),\s'(?P<icode>\s|\w)'\)\)" r"\s+(?P<res>[\w]{1,3})" r"(\s\[(?P<segid>[a-zA-z\s]+)\])?" r"\s*$") pdb_atm_re = re.compile(r"^ATOM\s\s(?:\s*(?P<ser>\d+))\s(?P<atm>[\w\s]{4})" r"(?P<alc>\w|\s)(?P<res>[\w]{3})\s(?P<chn>.)" r"(?P<pos>[\s\-\d]{4})(?P<icode>[A-Za-z\s])\s\s\s" r"(?P<x>[\s\-\d\.]{8})(?P<y>[\s\-\d\.]{8})" r"(?P<z>[\s\-\d\.]{8})(?P<occ>[\s\d\.]{6})" r"(?P<tfac>[\s\d\.]{6})\s{6}" r"(?P<segid>[a-zA-z\s]{4})(?P<elm>.{2})" r"(?P<chg>.{2})?\s*$") bfac_re = re.compile(r"^BFAC:\s([^\s]+\s+[\-\d\.]+)" r"\s*([^\s]+\s+[\-\d\.]+)?" r"\s*([^\s]+\s+[\-\d\.]+)?" r"\s*([^\s]+\s+[\-\d\.]+)?" r"\s*([^\s]+\s+[\-\d\.]+)?") bfac2_re = re.compile(r"([^\s]+)\s+([\-\d\.]+)") struct_builder = StructureBuilder() # init empty header dict # - could use to parse HEADER and TITLE lines except # deposition_date format changed from original PDB header header_dict = _parse_pdb_header_list([]) curr_SMCS = [None, None, None, None] # struct model chain seg SMCS_init = [ struct_builder.init_structure, struct_builder.init_model, struct_builder.init_chain, struct_builder.init_seg, ] sb_res = None with as_handle(file, mode="r") as handle: for aline in handle.readlines(): if aline.startswith("#"): pass # skip comment lines elif aline.startswith("HEADER "): m = pdb_hdr_re.match(aline) if m: header_dict["head"] = m.group("cf") # classification header_dict["idcode"] = m.group("id") header_dict["deposition_date"] = m.group("dd") elif verbose: print("Reading pic file", file, "HEADER parse fail: ", aline) elif aline.startswith("TITLE "): m = pdb_ttl_re.match(aline) if m: header_dict["name"] = m.group("ttl").strip() # print('TTL: ', m.group('ttl').strip()) elif verbose: print("Reading pic file", file, "TITLE parse fail:, ", aline) elif aline.startswith("("): # Biopython ID line for Residue m = biop_id_re.match(aline) if m: # check SMCS = Structure, Model, Chain, SegID segid = m.group(9) if segid is None: segid = " " this_SMCS = [ m.group(1), int(m.group(2)), m.group(3), segid ] if curr_SMCS != this_SMCS: # init new SMCS level as needed for i in range(4): if curr_SMCS[i] != this_SMCS[i]: SMCS_init[i](this_SMCS[i]) curr_SMCS[i] = this_SMCS[i] if 0 == i: # 0 = init structure so add header struct_builder.set_header(header_dict) elif 1 == i: # new model means new chain and new segid curr_SMCS[2] = curr_SMCS[3] = None struct_builder.init_residue( m.group("res"), m.group("het"), int(m.group("pos")), m.group("icode"), ) sb_res = struct_builder.residue if 2 == sb_res.is_disordered(): for r in sb_res.child_dict.values(): if not r.internal_coord: sb_res = r break sb_res.internal_coord = IC_Residue(sb_res) # print('res id:', m.groupdict()) # print(report_IC(struct_builder.get_structure())) else: if verbose: print("Reading pic file", file, "residue ID parse fail: ", aline) return None elif aline.startswith("ATOM "): m = pdb_atm_re.match(aline) if m: if sb_res is None: # ATOM without res spec already loaded, not a pic file if verbose: print( "Reading pic file", file, "ATOM without residue configured:, ", aline, ) return None if sb_res.resname != m.group("res") or sb_res.id[1] != int( m.group("pos")): if verbose: print( "Reading pic file", file, "ATOM not in configured residue (", sb_res.resname, str(sb_res.id), "):", aline, ) return None coord = numpy.array( (float(m.group("x")), float( m.group("y")), float(m.group("z"))), "f", ) struct_builder.init_atom( m.group("atm").strip(), coord, float(m.group("tfac")), float(m.group("occ")), m.group("alc"), m.group("atm"), int(m.group("ser")), m.group("elm").strip(), ) # print('atom: ', m.groupdict()) # elif verbose: # print("Reading pic file", file, "ATOM parse fail:", aline) elif aline.startswith("BFAC: "): m = bfac_re.match(aline) if m: for bfac_pair in m.groups(): if bfac_pair is not None: m2 = bfac2_re.match(bfac_pair) if m2 and sb_res is not None and sb_res.internal_coord: rp = sb_res.internal_coord rp.bfactors[m2.group(1)] = float(m2.group(2)) # else: # print('Reading pic file', file, 'B-factor line fail: ', aline) else: m = Edron.edron_re.match(aline) if m and sb_res is not None: sb_res.internal_coord.load_PIC(m.groupdict()) elif m: print( "PIC file: ", file, " error: no residue info before reading (di/h)edron data: ", aline, ) return None elif aline.strip(): if verbose: print("Reading PIC file", file, "parse fail on: .", aline, ".") return None struct = struct_builder.get_structure() for chn in struct.get_chains(): chnp = chn.internal_coord = IC_Chain(chn) # done in IC_Chain init : chnp.set_residues() chnp.link_residues() chnp.init_edra() # print(report_PIC(struct_builder.get_structure())) return struct
def read_PIC(file): """Load Protein Internal Coordinate (PIC) data from file. PIC file format: # comment lines start with # (optional) PDB HEADER record - idcode and deposition date recommended but optional - deposition date in PDB format or as changed by Biopython (optional) PDB TITLE record repeat: Biopython Residue Full ID - sets ID of returned structure (optional) PDB ATOM records for chain start N, CA, C PIC Hedra records for residue PIC Dihedra records for residue :param Bio.File file: file name or handle :returns: Biopython Structure object, Residues with .pic attributes but no coordinates except for chain start N, CA, C atoms if supplied, or None on parse fail (silent, no exception rasied) """ pdb_hdr_re = re.compile( r'^HEADER\s{4}(?P<cf>.{1,40})' r'(?:\s+(?P<dd>\d\d\d\d-\d\d-\d\d|\d\d-\w\w\w-\d\d))?' r'(?:\s+(?P<id>[0-9A-Z]{4}))?\s*$', ) # ^\('(?P<pid>\w*)',\s(?P<mdl>\d+),\s'(?P<chn>\w)',\s\('(?P<het>\s|[\w-]+)',\s(?P<pos>\d+),\s'(?P<icode>\s|\w)'\)\)\s(?P<res>[A-Z]{3})\s(\[(?P<segid>[a-zA-z\s]{4})\])?\s*$ pdb_ttl_re = re.compile(r'^TITLE\s{5}(?P<ttl>.+)\s*$') biop_id_re = re.compile(r"^\('(?P<pid>\w*)',\s(?P<mdl>\d+),\s" r"'(?P<chn>\s|\w)',\s\('(?P<het>\s|[\w\s-]+)" r"',\s(?P<pos>-?\d+),\s'(?P<icode>\s|\w)'\)\)" r'\s+(?P<res>[\w]{1,3})' r'(\s\[(?P<segid>[a-zA-z\s]+)\])?' r'\s*$') pdb_atm_re = re.compile(r'^ATOM\s\s(?:\s*(?P<ser>\d+))\s(?P<atm>[\w\s]{4})' r'(?P<alc>\w|\s)(?P<res>[\w]{3})\s(?P<chn>.)' r'(?P<pos>[\s\-\d]{4})(?P<icode>[A-Za-z\s])\s\s\s' r'(?P<x>[\s\-\d\.]{8})(?P<y>[\s\-\d\.]{8})' r'(?P<z>[\s\-\d\.]{8})(?P<occ>[\s\d\.]{6})' r'(?P<tfac>[\s\d\.]{6})\s{6}' r'(?P<segid>[a-zA-z\s]{4})(?P<elm>.{2})' r'(?P<chg>.{2})?\s*$') bfac_re = re.compile(r'^BFAC:\s([^\s]+\s+[\-\d\.]+)' r'\s*([^\s]+\s+[\-\d\.]+)?' r'\s*([^\s]+\s+[\-\d\.]+)?' r'\s*([^\s]+\s+[\-\d\.]+)?' r'\s*([^\s]+\s+[\-\d\.]+)?') bfac2_re = re.compile(r'([^\s]+)\s+([\-\d\.]+)') struct_builder = StructureBuilder() # init empty header dict # - could use to parse HEADER and TITLE lines except # deposition_date format changed from original PDB header header_dict = _parse_pdb_header_list([]) curr_SMCS = [None, None, None, None] # struct model chain seg SMCS_init = [ struct_builder.init_structure, struct_builder.init_model, struct_builder.init_chain, struct_builder.init_seg ] sb_res = None with as_handle(file, mode='r') as handle: for aline in handle.readlines(): if aline.startswith('#'): pass # skip comment lines elif aline.startswith('HEADER '): m = pdb_hdr_re.match(aline) if m: header_dict['head'] = m.group('cf') # classification header_dict['idcode'] = m.group('id') header_dict['deposition_date'] = m.group('dd') else: print('Reading pic file', file, 'HEADER fail: ', aline) pass elif aline.startswith('TITLE '): m = pdb_ttl_re.match(aline) if m: header_dict['name'] = m.group('ttl').strip() # print('TTL: ', m.group('ttl').strip()) else: print('Reading pic file', file, 'TITLE fail:, ', aline) elif aline.startswith('('): # Biopython ID line for Residue m = biop_id_re.match(aline) if m: # check SMCS = Structure, Model, Chain, SegID segid = m.group(9) if segid is None: segid = ' ' this_SMCS = [ m.group(1), int(m.group(2)), m.group(3), segid ] if curr_SMCS != this_SMCS: # init new SMCS level as needed for i in range(4): if curr_SMCS[i] != this_SMCS[i]: SMCS_init[i](this_SMCS[i]) curr_SMCS[i] = this_SMCS[i] if 0 == i: # 0 = init structure so add header struct_builder.set_header(header_dict) elif 1 == i: # new model means new chain and new segid curr_SMCS[2] = curr_SMCS[3] = None struct_builder.init_residue(m.group('res'), m.group('het'), int(m.group('pos')), m.group('icode')) sb_res = struct_builder.residue if 2 == sb_res.is_disordered(): for r in sb_res.child_dict.values(): if not hasattr(r, 'internal_coord'): sb_res = r break sb_res.internal_coord = IC_Residue(sb_res) # print('res id:', m.groupdict()) # print(report_PIC(struct_builder.get_structure())) else: print('Reading pic file', file, 'residue fail: ', aline) elif aline.startswith('ATOM '): m = pdb_atm_re.match(aline) if m: if sb_res is None: # ATOM without res spec already loaded, not a pic file print('no sb_res - not pic file', aline) return None if (sb_res.resname != m.group('res') or sb_res.id[1] != int(m.group('pos'))): # TODO: better exception here? raise Exception( 'pic ATOM read confusion: %s %s %s' % (sb_res.resname, str(sb_res.id), aline)) coord = numpy.array( (float(m.group('x')), float( m.group('y')), float(m.group('z'))), "f") struct_builder.init_atom( m.group('atm').strip(), coord, float(m.group('tfac')), float(m.group('occ')), m.group('alc'), m.group('atm'), int(m.group('ser')), m.group('elm').strip()) # print('atom: ', m.groupdict()) else: print('Reading pic file', file, 'ATOM fail: ', aline) elif aline.startswith('BFAC: '): m = bfac_re.match(aline) if m: for bfac_pair in m.groups(): if bfac_pair is not None: m2 = bfac2_re.match(bfac_pair) if (m2 and sb_res is not None and hasattr(sb_res, 'internal_coord')): rp = sb_res.internal_coord rp.bfactors[m2.group(1)] = float(m2.group(2)) else: m = Edron.edron_re.match(aline) if m: sb_res.internal_coord.load_PIC(m.groupdict()) elif aline.strip(): print('Reading PIC file', file, 'parse fail on: .', aline, '.') return None struct = struct_builder.get_structure() for chn in struct.get_chains(): chnp = chn.internal_coord = IC_Chain(chn) # done in IC_Chain init : chnp.set_residues() chnp.link_residues() chnp.render_dihedra() # print(report_PIC(struct_builder.get_structure())) return struct
protein_letters_3to1[residue.resname] for residue in chain if residue.id[0] == ' ' ]) tbl_cg_to_aa = [] for residue, sstype in zip(chain.child_list, martini_types): if residue.id[0] != ' ': # filter HETATMS continue # Convert SS to bfactor code sscode = ss_to_code[sstype] # Coarse grain residue residue_restraints, beads = MARTINI(residue) structure_builder.init_residue(residue.resname, residue.id[0], residue.id[1], residue.id[2]) # Populate residue for name, coord in beads: structure_builder.init_atom(name, coord, sscode, 1.00, " ", name, nbeads, "C") nbeads += 1 # Save restraints tbl_cg_to_aa.extend(residue_restraints) cg_model = structure_builder.get_structure() # output sequence and ss information print("%s:\t" % chain.id, aa_seq) print("%s:\t" % chain.id, martini_types) print("%s:\t" % chain.id, ''.join(map(lambda x: str(ss_to_code[x]), martini_types)))
def pdb_extract(structure, **kwargs): # model to extract from pdb extract_model = None if not 'model' in kwargs else kwargs['model'] new_model_id = -1 if not 'new_model' in kwargs else kwargs['new_model'] extract_chain = None if not 'chain' in kwargs else kwargs['chain'] first_res = None if not 'first_res' in kwargs else kwargs['first_res'] last_res = None if not 'last_res' in kwargs else kwargs['last_res'] new_first_res = None if not 'new_first_res' in kwargs else kwargs[ 'new_first_res'] gap_count = 0 if not 'gap_count' in kwargs else kwargs['gap_count'] water_id = None if not 'water' in kwargs else kwargs['water'] model_rebumber_flag = bool((extract_model is not None) or new_model_id >= 0) res_renumber_flag = bool(first_res or last_res or new_first_res or gap_count) structure_builder = StructureBuilder() structure_builder.init_structure('pdb_extract') structure_builder.set_line_counter(0) line_counter = 0 start_resseq_by_default = 1 if not new_first_res else new_first_res for model in structure: if model_rebumber_flag and \ ( extract_model is not None ) and model.get_id() != extract_model: continue if model_rebumber_flag and new_model_id >= 0: this_model_id = new_model_id new_model_id += 1 else: this_model_id = model.get_id() structure_builder.init_model(this_model_id, this_model_id) for chain in model: if extract_chain and chain.get_id() != extract_chain: continue structure_builder.init_seg(' ') structure_builder.init_chain(chain.get_id()) resdict = {} if res_renumber_flag: # first_res = res_range_tuple[0] # last_res = res_range_tuple[1] resdict['before'] = select_residues_from_chain( chain, first_res=first_res, gap_count=gap_count) resdict['hit'] = select_residues_from_chain( chain, first_res=first_res, last_res=last_res) resdict['after'] = select_residues_from_chain( chain, last_res=last_res, gap_count=gap_count) else: resdict['before'] = [] resdict['hit'] = chain.get_list() resdict['after'] = [] new_resseq = start_resseq_by_default - len(resdict['before']) resdict['water'] = chain_water_id(chain, water_id) for key in ['before', 'hit', 'after', 'water']: for residue in resdict[key]: if res_renumber_flag: new_resid = ' ', new_resseq, ' ' else: new_resid = residue.get_id() structure_builder.init_residue(residue.get_resname(), *new_resid) residue_atoms = None if key == 'before': residue_atoms = [atom for atom in residue if \ (atom.get_name() == 'C' or atom.get_name() == 'O')] elif key == 'hit' or key == 'water': residue_atoms = residue.get_list() elif key == 'after': residue_atoms = [atom for atom in residue if \ (atom.get_name() == 'N' or atom.get_name() == 'HN')] for atom in residue_atoms: structure_builder.init_atom(atom.get_name(), atom.get_coord(), atom.get_bfactor(), atom.get_occupancy(), atom.get_altloc(), atom.get_fullname()) structure_builder.set_line_counter(line_counter) line_counter += 1 new_resseq += 1 if key == 'water' and gap_count and len( resdict['after']) != gap_count: new_resseq += gap_count - len(resdict['after']) out_structure = structure_builder.get_structure() return out_structure
def read_PIC( file: TextIO, verbose: bool = False, quick: bool = False, defaults: bool = False, ) -> Structure: """Load Protein Internal Coordinate (.pic) data from file. PIC file format: - comment lines start with # - (optional) PDB HEADER record - idcode and deposition date recommended but optional - deposition date in PDB format or as changed by Biopython - (optional) PDB TITLE record - repeat: - Biopython Residue Full ID - sets residue IDs of returned structure - (optional) PDB N, CA, C ATOM records for chain start - (optional) PIC Hedra records for residue - (optional) PIC Dihedra records for residue - (optional) BFAC records listing AtomKeys and b-factors An improvement would define relative positions for HOH (water) entries. Defaults will be supplied for any value if defaults=True. Default values are supplied in ic_data.py, but structures degrade quickly with any deviation from true coordinates. Experiment with :data:`Bio.PDB.internal_coords.IC_Residue.pic_flags` options to :func:`write_PIC` to verify this. N.B. dihedron (i-1)C-N-CA-CB is ignored in assembly if O exists. C-beta is by default placed using O-C-CA-CB, but O is missing in some PDB file residues, which means the sidechain cannot be placed. The alternate CB path (i-1)C-N-CA-CB is provided to circumvent this, but if this is needed then it must be adjusted in conjunction with PHI ((i-1)C-N-CA-C) as they overlap (see :meth:`.bond_set` and :meth:`.bond_rotate` to handle this automatically). :param Bio.File file: :func:`.as_handle` file name or handle :param bool verbose: complain when lines not as expected :param bool quick: don't check residues for all dihedra (no default values) :param bool defaults: create di/hedra as needed from reference database. Amide proton created if 'H' is in IC_Residue.accept_atoms :returns: Biopython Structure object, Residues with .internal_coord attributes but no coordinates except for chain start N, CA, C atoms if supplied, **OR** None on parse fail (silent unless verbose=True) """ proton = "H" in IC_Residue.accept_atoms pdb_hdr_re = re.compile( r"^HEADER\s{4}(?P<cf>.{1,40})" r"(?:\s+(?P<dd>\d\d\d\d-\d\d-\d\d|\d\d-\w\w\w-\d\d))?" r"(?:\s+(?P<id>[0-9A-Z]{4}))?\s*$") pdb_ttl_re = re.compile(r"^TITLE\s{5}(?P<ttl>.+)\s*$") biop_id_re = re.compile(r"^\('(?P<pid>[^\s]*)',\s(?P<mdl>\d+),\s" r"'(?P<chn>\s|\w)',\s\('(?P<het>\s|[\w\s-]+)" r"',\s(?P<pos>-?\d+),\s'(?P<icode>\s|\w)'\)\)" r"\s+(?P<res>[\w]{1,3})" r"(\s\[(?P<segid>[a-zA-z\s]+)\])?" r"\s*$") pdb_atm_re = re.compile(r"^ATOM\s\s(?:\s*(?P<ser>\d+))\s(?P<atm>[\w\s]{4})" r"(?P<alc>\w|\s)(?P<res>[\w]{3})\s(?P<chn>.)" r"(?P<pos>[\s\-\d]{4})(?P<icode>[A-Za-z\s])\s\s\s" r"(?P<x>[\s\-\d\.]{8})(?P<y>[\s\-\d\.]{8})" r"(?P<z>[\s\-\d\.]{8})(?P<occ>[\s\d\.]{6})" r"(?P<tfac>[\s\d\.]{6})\s{6}" r"(?P<segid>[a-zA-z\s]{4})(?P<elm>.{2})" r"(?P<chg>.{2})?\s*$") bfac_re = re.compile(r"^BFAC:\s([^\s]+\s+[\-\d\.]+)" r"\s*([^\s]+\s+[\-\d\.]+)?" r"\s*([^\s]+\s+[\-\d\.]+)?" r"\s*([^\s]+\s+[\-\d\.]+)?" r"\s*([^\s]+\s+[\-\d\.]+)?") bfac2_re = re.compile(r"([^\s]+)\s+([\-\d\.]+)") struct_builder = StructureBuilder() # init empty header dict # - could use to parse HEADER and TITLE lines except # deposition_date format changed from original PDB header header_dict = _parse_pdb_header_list([]) curr_SMCS = [None, None, None, None] # struct model chain seg SMCS_init = [ struct_builder.init_structure, struct_builder.init_model, struct_builder.init_chain, struct_builder.init_seg, ] sb_res = None rkl = None sb_chain = None sbcic = None sbric = None akc = {} hl12 = {} ha = {} hl23 = {} da = {} bfacs = {} orphan_aks = set() # [] tr = [] # this residue pr = [] # previous residue def akcache(akstr: str) -> AtomKey: """Maintain dictionary of AtomKeys seen while reading this PIC file.""" # akstr: full AtomKey string read from .pic file, includes residue info try: return akc[akstr] except (KeyError): ak = akc[akstr] = AtomKey(akstr) return ak def link_residues(ppr: List[Residue], pr: List[Residue]) -> None: """Set next and prev links between i-1 and i-2 residues.""" for p_r in pr: pric = p_r.internal_coord for p_p_r in ppr: ppric = p_p_r.internal_coord if p_r.id[0] == " ": # not heteroatoms if pric not in ppric.rnext: ppric.rnext.append(pric) if p_p_r.id[0] == " ": if ppric not in pric.rprev: pric.rprev.append(ppric) def process_hedron( a1: str, a2: str, a3: str, l12: str, ang: str, l23: str, ric: IC_Residue, ) -> Tuple: """Create Hedron on current (sbcic) Chain.internal_coord.""" ek = (akcache(a1), akcache(a2), akcache(a3)) atmNdx = AtomKey.fields.atm accpt = IC_Residue.accept_atoms if not all(ek[i].akl[atmNdx] in accpt for i in range(3)): return hl12[ek] = float(l12) ha[ek] = float(ang) hl23[ek] = float(l23) sbcic.hedra[ek] = ric.hedra[ek] = h = Hedron(ek) h.cic = sbcic ak_add(ek, ric) return ek def default_hedron(ek: Tuple, ric: IC_Residue) -> None: """Create Hedron based on same rdh_class hedra in ref database. Adds Hedron to current Chain.internal_coord, see ic_data for default values and reference database source. """ aks = [] hkey = None atmNdx = AtomKey.fields.atm resNdx = AtomKey.fields.resname resPos = AtomKey.fields.respos aks = [ek[i].akl for i in range(3)] atpl = tuple([aks[i][atmNdx] for i in range(3)]) res = aks[0][resNdx] if (aks[0][resPos] != aks[2][resPos] # hedra crosses amide bond so not reversed or atpl == ("N", "CA", "C") # or chain start tau or atpl in ic_data_backbone # or found forward hedron in ic_data or (res not in ["A", "G"] and atpl in ic_data_sidechains[res])): hkey = ek rhcl = [aks[i][resNdx] + aks[i][atmNdx] for i in range(3)] try: dflts = hedra_defaults["".join(rhcl)][0] except KeyError: if aks[0][resPos] == aks[1][resPos]: rhcl = [aks[i][resNdx] + aks[i][atmNdx] for i in range(2)] rhc = "".join(rhcl) + "X" + aks[2][atmNdx] else: rhcl = [ aks[i][resNdx] + aks[i][atmNdx] for i in range(1, 3) ] rhc = "X" + aks[0][atmNdx] + "".join(rhcl) dflts = hedra_defaults[rhc][0] else: # must be reversed or fail hkey = ek[::-1] rhcl = [aks[i][resNdx] + aks[i][atmNdx] for i in range(2, -1, -1)] dflts = hedra_defaults["".join(rhcl)][0] process_hedron( str(hkey[0]), str(hkey[1]), str(hkey[2]), dflts[0], dflts[1], dflts[2], ric, ) if verbose: print(f" default for {ek}") def hedra_check(dk: str, ric: IC_Residue) -> None: """Confirm both hedra present for dihedron key, use default if set.""" if dk[0:3] not in sbcic.hedra and dk[2::-1] not in sbcic.hedra: if defaults: default_hedron(dk[0:3], ric) else: print(f"{dk} missing h1") if dk[1:4] not in sbcic.hedra and dk[3:0:-1] not in sbcic.hedra: if defaults: default_hedron(dk[1:4], ric) else: print(f"{dk} missing h2") def process_dihedron(a1: str, a2: str, a3: str, a4: str, dangle: str, ric: IC_Residue) -> Set: """Create Dihedron on current Chain.internal_coord.""" ek = ( akcache(a1), akcache(a2), akcache(a3), akcache(a4), ) atmNdx = AtomKey.fields.atm accpt = IC_Residue.accept_atoms if not all(ek[i].akl[atmNdx] in accpt for i in range(4)): return da[ek] = float(dangle) sbcic.dihedra[ek] = ric.dihedra[ek] = d = Dihedron(ek) d.cic = sbcic if not quick: hedra_check(ek, ric) ak_add(ek, ric) return ek def default_dihedron(ek: List, ric: IC_Residue) -> None: """Create Dihedron based on same residue class dihedra in ref database. Adds Dihedron to current Chain.internal_coord, see ic_data for default values and reference database source. """ atmNdx = AtomKey.fields.atm resNdx = AtomKey.fields.resname resPos = AtomKey.fields.respos rdclass = "" dclass = "" for ak in ek: dclass += ak.akl[atmNdx] rdclass += ak.akl[resNdx] + ak.akl[atmNdx] if dclass == "NCACN": rdclass = rdclass[0:7] + "XN" elif dclass == "CACNCA": rdclass = "XCAXC" + rdclass[5:] elif dclass == "CNCAC": rdclass = "XC" + rdclass[2:] if rdclass in dihedra_primary_defaults: process_dihedron( str(ek[0]), str(ek[1]), str(ek[2]), str(ek[3]), dihedra_primary_defaults[rdclass][0], ric, ) if verbose: print(f" default for {ek}") elif rdclass in dihedra_secondary_defaults: primAngle, offset = dihedra_secondary_defaults[rdclass] rname = ek[2].akl[resNdx] rnum = int(ek[2].akl[resPos]) paKey = None if primAngle == ("N", "CA", "C", "N") and ek[0].ric.rnext != []: paKey = [ AtomKey((rnum, None, rname, primAngle[x], None, None)) for x in range(3) ] rnext = ek[0].ric.rnext paKey.append( AtomKey(( rnext[0].rbase[0], None, rnext[0].rbase[2], "N", None, None, ))) paKey = tuple(paKey) elif primAngle == ("CA", "C", "N", "CA"): prname = pr.akl[0][resNdx] prnum = pr.akl[0][resPos] paKey = [ AtomKey(prnum, None, prname, primAngle[x], None, None) for x in range(0, 2) ] paKey.add([ AtomKey((rnum, None, rname, primAngle[x], None, None)) for x in range(2, 4) ]) paKey = tuple(paKey) else: paKey = tuple( AtomKey((rnum, None, rname, atm, None, None)) for atm in primAngle) if paKey in da: process_dihedron( str(ek[0]), str(ek[1]), str(ek[2]), str(ek[3]), da[paKey] + dihedra_secondary_defaults[rdclass][1], ric, ) if verbose: print(f" secondary default for {ek}") elif rdclass in dihedra_secondary_xoxt_defaults: if primAngle == ("C", "N", "CA", "C"): # primary for alt cb # no way to trigger alt cb with default=True # because will generate default N-CA-C-O prname = pr.akl[0][resNdx] prnum = pr.akl[0][resPos] paKey = [ AtomKey(prnum, None, prname, primAngle[0], None, None) ] paKey.add([ AtomKey((rnum, None, rname, primAngle[x], None, None)) for x in range(1, 4) ]) paKey = tuple(paKey) else: primAngle, offset = dihedra_secondary_xoxt_defaults[ rdclass] rname = ek[2].akl[resNdx] rnum = int(ek[2].akl[resPos]) paKey = tuple( AtomKey((rnum, None, rname, atm, None, None)) for atm in primAngle) if paKey in da: process_dihedron( str(ek[0]), str(ek[1]), str(ek[2]), str(ek[3]), da[paKey] + offset, ric, ) if verbose: print(f" oxt default for {ek}") else: print(f"missing primary angle {paKey} {primAngle} to " f"generate {rnum}{rname} {rdclass}") else: print( f"missing {ek} -> {rdclass} ({dclass}) not found in primary or" " secondary defaults") def dihedra_check(ric: IC_Residue) -> None: """Look for required dihedra in residue, generate defaults if set.""" # rnext should be set def ake_recurse(akList: List) -> List: """Bulid combinatorics of AtomKey lists.""" car = akList[0] if len(akList) > 1: retList = [] for ak in car: cdr = akList[1:] rslt = ake_recurse(cdr) for r in rslt: r.insert(0, ak) retList.append(r) return retList else: if len(car) == 1: return [list(car)] else: retList = [[ak] for ak in car] return retList def ak_expand(eLst: List) -> List: """Expand AtomKey list with altlocs, all combinatorics.""" retList = [] for edron in eLst: newList = [] for ak in edron: rslt = ak.ric.split_akl([ak]) rlst = [r[0] for r in rslt] if rlst != []: newList.append(rlst) else: newList.append([ak]) rslt = ake_recurse(newList) for r in rslt: retList.append(r) return retList # dihedra_check processing starts here # generate the list of dihedra this residue should have chkLst = [] sN, sCA, sC = AtomKey(ric, "N"), AtomKey(ric, "CA"), AtomKey(ric, "C") sO, sCB, sH = AtomKey(ric, "O"), AtomKey(ric, "CB"), AtomKey(ric, "H") if ric.rnext != []: for rn in ric.rnext: nN, nCA, nC = ( AtomKey(rn, "N"), AtomKey(rn, "CA"), AtomKey(rn, "C"), ) # intermediate residue, need psi, phi, omg chkLst.append((sN, sCA, sC, nN)) # psi chkLst.append((sCA, sC, nN, nCA)) # omg i+1 chkLst.append((sC, nN, nCA, nC)) # phi i+1 else: chkLst.append((sN, sCA, sC, AtomKey(ric, "OXT"))) # psi rn = "(no rnext)" chkLst.append((sN, sCA, sC, sO)) # locate backbone O if ric.lc != "G": chkLst.append((sO, sC, sCA, sCB)) # locate CB if ric.rprev != [] and ric.lc != "P" and proton: chkLst.append((sC, sCA, sN, sH)) # amide proton try: for edron in ic_data_sidechains[ric.lc]: if len(edron) > 3: # dihedra only if all(not atm[0] == "H" for atm in edron): akl = [AtomKey(ric, atm) for atm in edron[0:4]] chkLst.append(akl) except KeyError: pass # now compare generated list to ric.dihedra, get defaults if set. chkLst = ak_expand(chkLst) altloc_ndx = AtomKey.fields.altloc for dk in chkLst: if tuple(dk) in ric.dihedra: pass elif sH in dk: pass # ignore missing hydrogens elif all(atm.akl[altloc_ndx] is None for atm in dk): if defaults: default_dihedron(dk, ric) else: if verbose: print(f"{ric}-{rn} missing {dk}") else: # print(f"skip {ek}") pass # ignore missing combinatoric of altloc atoms # need more here? def ak_add(ek: set, ric: IC_Residue) -> None: """Allocate edron key AtomKeys to current residue as appropriate. A hedron or dihedron may span a backbone amide bond, this routine allocates atoms in the (h/di)edron to the ric residue or saves them for a residue yet to be processed. :param set ek: AtomKeys in edron :param IC_Residue ric: current residue to assign AtomKeys to """ res = ric.residue reskl = ( str(res.id[1]), (None if res.id[2] == " " else res.id[2]), ric.lc, ) for ak in ek: if ak.ric is None: sbcic.akset.add(ak) if ak.akl[0:3] == reskl: ak.ric = ric ric.ak_set.add(ak) else: orphan_aks.add(ak) def finish_chain() -> None: """Do last rnext, rprev links and process chain edra data.""" link_residues(pr, tr) # check/confirm completeness if not quick: for r in pr: dihedra_check(r.internal_coord) for r in tr: dihedra_check(r.internal_coord) if ha != {}: sha = {k: ha[k] for k in sorted(ha)} shl12 = {k: hl12[k] for k in sorted(hl12)} shl23 = {k: hl23[k] for k in sorted(hl23)} sbcic._hedraDict2chain(shl12, sha, shl23, da, bfacs) # read_PIC processing starts here: with as_handle(file, mode="r") as handle: for line in handle.readlines(): if line.startswith("#"): pass # skip comment lines elif line.startswith("HEADER "): m = pdb_hdr_re.match(line) if m: header_dict["head"] = m.group("cf") # classification header_dict["idcode"] = m.group("id") header_dict["deposition_date"] = m.group("dd") elif verbose: print("Reading pic file", file, "HEADER parse fail: ", line) elif line.startswith("TITLE "): m = pdb_ttl_re.match(line) if m: header_dict["name"] = m.group("ttl").strip() # print('TTL: ', m.group('ttl').strip()) elif verbose: print("Reading pic file", file, "TITLE parse fail:, ", line) elif line.startswith("("): # Biopython ID line for Residue m = biop_id_re.match(line) if m: # check SMCS = Structure, Model, Chain, SegID segid = m.group(9) if segid is None: segid = " " this_SMCS = [ m.group(1), int(m.group(2)), m.group(3), segid, ] if curr_SMCS != this_SMCS: if curr_SMCS[:3] != this_SMCS[:3] and ha != {}: # chain change so process current chain data finish_chain() akc = {} # atomkey cache, used by akcache() hl12 = {} # hedra key -> len12 ha = {} # -> hedra angle hl23 = {} # -> len23 da = {} # dihedra key -> angle value bfacs = {} # atomkey string -> b-factor # init new Biopython SMCS level as needed for i in range(4): if curr_SMCS[i] != this_SMCS[i]: SMCS_init[i](this_SMCS[i]) curr_SMCS[i] = this_SMCS[i] if i == 0: # 0 = init structure so add header struct_builder.set_header(header_dict) elif i == 1: # new model means new chain and new segid curr_SMCS[2] = curr_SMCS[3] = None elif i == 2: # new chain so init internal_coord sb_chain = struct_builder.chain sbcic = sb_chain.internal_coord = IC_Chain( sb_chain) struct_builder.init_residue( m.group("res"), m.group("het"), int(m.group("pos")), m.group("icode"), ) sb_res = struct_builder.residue if sb_res.id[0] != " ": # skip hetatm continue if 2 == sb_res.is_disordered(): for r in sb_res.child_dict.values(): if not r.internal_coord: sb_res = r break # added to disordered res tr.append(sb_res) else: # new res so fix up previous residue as feasible link_residues(pr, tr) if not quick: for r in pr: # create di/hedra if default for residue i-1 # just linked dihedra_check(r.internal_coord) pr = tr tr = [sb_res] sbric = sb_res.internal_coord = IC_Residue( sb_res) # no atoms so no rak sbric.cic = sbcic rkl = ( str(sb_res.id[1]), (None if sb_res.id[2] == " " else sb_res.id[2]), sbric.lc, ) sbcic.ordered_aa_ic_list.append(sbric) # update AtomKeys w/o IC_Residue references, in case # chain ends before di/hedra sees them (2XHE test case) for ak in orphan_aks: if ak.akl[0:3] == rkl: ak.ric = sbric sbric.ak_set.add(ak) # may need altoc support here orphan_aks = set( filter(lambda ak: ak.ric is None, orphan_aks)) else: if verbose: print( "Reading pic file", file, "residue ID parse fail: ", line, ) return None elif line.startswith("ATOM "): m = pdb_atm_re.match(line) if m: if sb_res is None: # ATOM without res spec already loaded, not a pic file if verbose: print( "Reading pic file", file, "ATOM without residue configured:, ", line, ) return None if sb_res.resname != m.group("res") or sb_res.id[1] != int( m.group("pos")): if verbose: print( "Reading pic file", file, "ATOM not in configured residue (", sb_res.resname, str(sb_res.id), "):", line, ) return None coord = numpy.array( ( float(m.group("x")), float(m.group("y")), float(m.group("z")), ), "f", ) struct_builder.init_atom( m.group("atm").strip(), coord, float(m.group("tfac")), float(m.group("occ")), m.group("alc"), m.group("atm"), int(m.group("ser")), m.group("elm").strip(), ) # reset because prev does not link to this residue # (chainBreak) pr = [] elif line.startswith("BFAC: "): m = bfac_re.match(line) if m: for bfac_pair in m.groups(): if bfac_pair is not None: m2 = bfac2_re.match(bfac_pair) bfacs[m2.group(1)] = float(m2.group(2)) # else: # print f"Reading pic file {file} B-factor fail: {line}" else: m = Edron.edron_re.match(line) if m and sb_res is not None: if m["a4"] is None: process_hedron( m["a1"], m["a2"], m["a3"], m["len12"], m["angle"], m["len23"], sb_res.internal_coord, ) else: process_dihedron( m["a1"], m["a2"], m["a3"], m["a4"], float(m["dihedral"]), sb_res.internal_coord, ) elif m: print( "PIC file: ", file, " error: no residue info before reading (di/h)edron: ", line, ) return None elif line.strip(): if verbose: print( "Reading PIC file", file, "parse fail on: .", line, ".", ) return None # reached end of input finish_chain() # print(report_PIC(struct_builder.get_structure())) return struct_builder.get_structure()