예제 #1
0
def build_structure(atoms):
    sb = StructureBuilder()
    sb.init_structure('pdb')
    sb.init_seg(' ')
    sb.init_model(0)
    sb.init_chain('A')
    i = 1
    for atom in atoms:
        sb.init_residue('DUM', ' ', i, ' ')
        sb.structure[0]['A'].child_list[i - 1].add(atom)
        i += 1
    return sb.structure
예제 #2
0
    def set_structure(self, pdb_object):
        """Check what the user is providing and build a structure."""
        # The idea here is to build missing upstream components of
        # the SMCRA object representation. E.g., if the user provides
        # a Residue, build Structure/Model/Chain.

        if pdb_object.level == "S":
            structure = pdb_object
        else:  # Not a Structure
            sb = StructureBuilder()
            sb.init_structure("pdb")
            sb.init_seg(" ")

            if pdb_object.level == "M":
                sb.structure.add(pdb_object.copy())
                self.structure = sb.structure
            else:  # Not a Model
                sb.init_model(0)

                if pdb_object.level == "C":
                    sb.structure[0].add(pdb_object.copy())
                else:  # Not a Chain
                    chain_id = "A"  # default
                    sb.init_chain(chain_id)

                    if pdb_object.level == "R":  # Residue
                        # Residue extracted from a larger structure?
                        if pdb_object.parent is not None:
                            og_chain_id = pdb_object.parent.id
                            sb.structure[0][chain_id].id = og_chain_id
                            chain_id = og_chain_id

                        sb.structure[0][chain_id].add(pdb_object.copy())

                    else:  # Atom
                        sb.init_residue("DUM", " ", 1, " ")  # Dummy residue
                        sb.structure[0][chain_id].child_list[0].add(
                            pdb_object.copy())

                        # Fix chain identifier if Atom has grandparents.
                        try:
                            og_chain_id = pdb_object.parent.parent.id
                        except AttributeError:  # pdb_object.parent == None
                            pass
                        else:
                            sb.structure[0][chain_id].id = og_chain_id

            # Return structure
            structure = sb.structure
        self.structure = structure
예제 #3
0
def pdb_writer(graph, results, dir_name="templates"):
    PDB_CODE = graph.protein_name
    structure = graph.structure

    path = "%s/%s/" % (dir_name, PDB_CODE)
    if len(results) > 0:
        if not os.path.exists(path):
            os.makedirs(path)
    else:
        print 'No results'
        return

    top = min(NUM_OF_RESULTS_TO_SAVE, len(results))
    i = 0
    for key, value in results[:top]:
        # TODO - save as different chains
        sb = StructureBuilder()
        sb.init_structure(PDB_CODE)
        sb.init_model(0)
        sb.init_chain('X')
        for node_id in key[0]:
            for res in graph.nodes[node_id].residues:
                sb.init_seg(res.get_segid())
                sb.init_residue(res.get_resname(),
                                res.get_id()[0],
                                res.get_id()[1],
                                res.get_id()[2])
                for atom in res:
                    sb.init_atom(atom.get_name(), atom.get_coord(),
                                 atom.get_bfactor(), atom.get_occupancy(),
                                 atom.get_altloc(), atom.get_fullname())

        sb.init_chain('Y')
        for node_id in key[1]:
            for res in graph.nodes[node_id].residues:
                sb.init_residue(res.get_resname(),
                                res.get_id()[0],
                                res.get_id()[1],
                                res.get_id()[2])
                for atom in res:
                    sb.init_atom(atom.get_name(), atom.get_coord(),
                                 atom.get_bfactor(), atom.get_occupancy(),
                                 atom.get_altloc(), atom.get_fullname())
        filename = path + "interface%d.pdb" % (i)
        io = PDBIO()
        io.set_structure(sb.get_structure())
        io.save(filename)
        i += 1

    return 1
예제 #4
0
    def write_helical_axes(self, filename):
        """Writes helical axes in PDB format."""

        sb = StructureBuilder()
        sb.init_structure('')
        sb.init_model('')

        for cpos, chain in enumerate(self.chains):
            sb.init_chain(str(cpos))
            sb.init_seg('')

            for pos, i in enumerate(chain.res[1:-1]):
                sb.init_residue('ALA', ' ', pos, ' ')
                sb.init_atom('CA', i.O._ar, 0, 0, " ", ' CA ', 1)

        io = PDBIO()
        io.set_structure(sb.structure)
        io.save(filename)
예제 #5
0
    def set_structure(self, pdb_object):
        """Check what object the user is providing and build a structure."""
        # This is duplicated from the PDBIO class
        if pdb_object.level == "S":
            structure = pdb_object
        else:
            sb = StructureBuilder()
            sb.init_structure('pdb')
            sb.init_seg(' ')
            # Build parts as necessary
            if pdb_object.level == "M":
                sb.structure.add(pdb_object)
                self.structure = sb.structure
            else:
                sb.init_model(0)
                if pdb_object.level == "C":
                    sb.structure[0].add(pdb_object)
                else:
                    sb.init_chain('A')
                    if pdb_object.level == "R":
                        try:
                            parent_id = pdb_object.parent.id
                            sb.structure[0]['A'].id = parent_id
                        except ValueError:
                            pass
                        sb.structure[0]['A'].add(pdb_object)
                    else:
                        # Atom
                        sb.init_residue('DUM', ' ', 1, ' ')
                        try:
                            parent_id = pdb_object.parent.parent.id
                            sb.structure[0]['A'].id = parent_id
                        except ValueError:
                            pass
                        sb.structure[0]['A'].child_list[0].add(pdb_object)

            # Return structure
            structure = sb.structure
        self.structure = structure
예제 #6
0
    def set_structure(self, pdb_object):
        """Check what object the user is providing and build a structure."""
        # This is duplicated from the PDBIO class
        if pdb_object.level == "S":
            structure = pdb_object
        else:
            sb = StructureBuilder()
            sb.init_structure('pdb')
            sb.init_seg(' ')
            # Build parts as necessary
            if pdb_object.level == "M":
                sb.structure.add(pdb_object)
                self.structure = sb.structure
            else:
                sb.init_model(0)
                if pdb_object.level == "C":
                    sb.structure[0].add(pdb_object)
                else:
                    sb.init_chain('A')
                    if pdb_object.level == "R":
                        try:
                            parent_id = pdb_object.parent.id
                            sb.structure[0]['A'].id = parent_id
                        except ValueError:
                            pass
                        sb.structure[0]['A'].add(pdb_object)
                    else:
                        # Atom
                        sb.init_residue('DUM', ' ', 1, ' ')
                        try:
                            parent_id = pdb_object.parent.parent.id
                            sb.structure[0]['A'].id = parent_id
                        except ValueError:
                            pass
                        sb.structure[0]['A'].child_list[0].add(pdb_object)

            # Return structure
            structure = sb.structure
        self.structure = structure
예제 #7
0
파일: PDBIO.py 프로젝트: tulw4r/biopython
    def set_structure(self, pdb_object):
        """Check what the user is providing and build a structure."""
        if pdb_object.level == "S":
            structure = pdb_object
        else:
            sb = StructureBuilder()
            sb.init_structure("pdb")
            sb.init_seg(" ")
            # Build parts as necessary
            if pdb_object.level == "M":
                sb.structure.add(pdb_object.copy())
                self.structure = sb.structure
            else:
                sb.init_model(0)
                if pdb_object.level == "C":
                    sb.structure[0].add(pdb_object.copy())
                else:
                    sb.init_chain("A")
                    if pdb_object.level == "R":
                        try:
                            parent_id = pdb_object.parent.id
                            sb.structure[0]["A"].id = parent_id
                        except Exception:
                            pass
                        sb.structure[0]["A"].add(pdb_object.copy())
                    else:
                        # Atom
                        sb.init_residue("DUM", " ", 1, " ")
                        try:
                            parent_id = pdb_object.parent.parent.id
                            sb.structure[0]["A"].id = parent_id
                        except Exception:
                            pass
                        sb.structure[0]["A"].child_list[0].add(
                            pdb_object.copy())

            # Return structure
            structure = sb.structure
        self.structure = structure
예제 #8
0
    def set_structure(self, pdb_object):
        """Check what the user is providing and build a structure."""
        if pdb_object.level == "S":
            structure = pdb_object
        else:
            sb = StructureBuilder()
            sb.init_structure('pdb')
            sb.init_seg(' ')
            # Build parts as necessary
            if pdb_object.level == "M":
                sb.structure.add(pdb_object.copy())
                self.structure = sb.structure
            else:
                sb.init_model(0)
                if pdb_object.level == "C":
                    sb.structure[0].add(pdb_object.copy())
                else:
                    sb.init_chain('A')
                    if pdb_object.level == "R":
                        try:
                            parent_id = pdb_object.parent.id
                            sb.structure[0]['A'].id = parent_id
                        except Exception:
                            pass
                        sb.structure[0]['A'].add(pdb_object.copy())
                    else:
                        # Atom
                        sb.init_residue('DUM', ' ', 1, ' ')
                        try:
                            parent_id = pdb_object.parent.parent.id
                            sb.structure[0]['A'].id = parent_id
                        except Exception:
                            pass
                        sb.structure[0]['A'].child_list[0].add(pdb_object.copy())

            # Return structure
            structure = sb.structure
        self.structure = structure
예제 #9
0
    def set_structure(self, pdb_object):
        # Check what the user is providing and build a structure appropriately
        if pdb_object.level == "S":
            structure = pdb_object
        else:
            sb = StructureBuilder()
            sb.init_structure('pdb')
            sb.init_seg(' ')
            # Build parts as necessary
            if pdb_object.level == "M":
                sb.structure.add(pdb_object)
                self.structure = sb.structure
            else:
                sb.init_model(0)
                if pdb_object.level == "C":
                    sb.structure[0].add(pdb_object)
                else:
                    sb.init_chain('A')
                    if pdb_object.level == "R":
                        try:
                            parent_id = pdb_object.parent.id
                            sb.structure[0]['A'].id = parent_id
                        except Exception:
                            pass
                        sb.structure[0]['A'].add(pdb_object)
                    else:
                        # Atom
                        sb.init_residue('DUM', ' ', 1, ' ')
                        try:
                            parent_id = pdb_object.parent.parent.id
                            sb.structure[0]['A'].id = parent_id
                        except Exception:
                            pass
                        sb.structure[0]['A'].child_list[0].add(pdb_object)

            # Return structure
            structure = sb.structure
        self.structure = structure
예제 #10
0
파일: PDBIO.py 프로젝트: kehey/biopython
    def set_structure(self, pdb_object):
        # Check what the user is providing and build a structure appropriately
        if pdb_object.level == "S":
            structure = pdb_object
        else:
            sb = StructureBuilder()
            sb.init_structure("pdb")
            sb.init_seg(" ")
            # Build parts as necessary
            if pdb_object.level == "M":
                sb.structure.add(pdb_object)
                self.structure = sb.structure
            else:
                sb.init_model(0)
                if pdb_object.level == "C":
                    sb.structure[0].add(pdb_object)
                else:
                    sb.init_chain("A")
                    if pdb_object.level == "R":
                        try:
                            parent_id = pdb_object.parent.id
                            sb.structure[0]["A"].id = parent_id
                        except Exception:
                            pass
                        sb.structure[0]["A"].add(pdb_object)
                    else:
                        # Atom
                        sb.init_residue("DUM", " ", 1, " ")
                        try:
                            parent_id = pdb_object.parent.parent.id
                            sb.structure[0]["A"].id = parent_id
                        except Exception:
                            pass
                        sb.structure[0]["A"].child_list[0].add(pdb_object)

            # Return structure
            structure = sb.structure
        self.structure = structure
예제 #11
0
class StructureDecoder(object):
    """Class to pass the data from mmtf-python into a Biopython data structure."""

    def __init__(self):
        """Initialize the class."""
        self.this_type = ""

    def init_structure(self, total_num_bonds, total_num_atoms,
                       total_num_groups, total_num_chains, total_num_models,
                       structure_id):
        """Initialize the structure object.

        :param total_num_bonds: the number of bonds in the structure
        :param total_num_atoms: the number of atoms in the structure
        :param total_num_groups: the number of groups in the structure
        :param total_num_chains: the number of chains in the structure
        :param total_num_models: the number of models in the structure
        :param structure_id: the id of the structure (e.g. PDB id)

        """
        self.structure_bulder = StructureBuilder()
        self.structure_bulder.init_structure(structure_id=structure_id)
        self.chain_index_to_type_map = {}
        self.chain_index_to_seq_map = {}
        self.chain_index_to_description_map = {}
        self.chain_counter = 0

    def set_atom_info(self, atom_name, serial_number, alternative_location_id,
                      x, y, z, occupancy, temperature_factor, element, charge):
        """Create an atom object an set the information.

        :param atom_name: the atom name, e.g. CA for this atom
        :param serial_number: the serial id of the atom (e.g. 1)
        :param alternative_location_id: the alternative location id for the atom, if present
        :param x: the x coordiante of the atom
        :param y: the y coordinate of the atom
        :param z: the z coordinate of the atom
        :param occupancy: the occupancy of the atom
        :param temperature_factor: the temperature factor of the atom
        :param element: the element of the atom, e.g. C for carbon. According to IUPAC. Calcium  is Ca
        :param charge: the formal atomic charge of the atom

        """
        # MMTF uses "\x00" (the NUL character) to indicate to altloc, so convert
        # that to the space required by StructureBuilder
        if alternative_location_id == "\x00":
            alternative_location_id = " "

        # Atom_name is in twice - the full_name is with spaces
        self.structure_bulder.init_atom(str(atom_name), [x, y, z],
                                        temperature_factor, occupancy,
                                        alternative_location_id, str(atom_name),
                                        serial_number=serial_number,
                                        element=str(element).upper())

    def set_chain_info(self, chain_id, chain_name, num_groups):
        """Set the chain information.

        :param chain_id: the asym chain id from mmCIF
        :param chain_name: the auth chain id from mmCIF
        :param num_groups: the number of groups this chain has

        """
        # A Bradley - chose to use chain_name (auth_id) as it complies
        # with current Biopython. Chain_id might be better.
        self.structure_bulder.init_chain(chain_id=chain_name)
        if self.chain_index_to_type_map[self.chain_counter] == "polymer":
            self.this_type = " "
        elif self.chain_index_to_type_map[self.chain_counter] == "non-polymer":
            self.this_type = "H"
        elif self.chain_index_to_type_map[self.chain_counter] == "water":
            self.this_type = "W"
        self.chain_counter += 1

    def set_entity_info(self, chain_indices, sequence, description, entity_type):
        """Set the entity level information for the structure.

        :param chain_indices: the indices of the chains for this entity
        :param sequence: the one letter code sequence for this entity
        :param description: the description for this entity
        :param entity_type: the entity type (polymer,non-polymer,water)

        """
        for chain_ind in chain_indices:
            self.chain_index_to_type_map[chain_ind] = entity_type
            self.chain_index_to_seq_map[chain_ind] = sequence
            self.chain_index_to_description_map[chain_ind] = description

    def set_group_info(self, group_name, group_number, insertion_code,
                       group_type, atom_count, bond_count, single_letter_code,
                       sequence_index, secondary_structure_type):
        """Set the information for a group

        :param group_name: the name of this group, e.g. LYS
        :param group_number: the residue number of this group
        :param insertion_code: the insertion code for this group
        :param group_type: a string indicating the type of group (as found in the chemcomp dictionary.
            Empty string if none available.
        :param atom_count: the number of atoms in the group
        :param bond_count: the number of unique bonds in the group
        :param single_letter_code: the single letter code of the group
        :param sequence_index: the index of this group in the sequence defined by the entity
        :param secondary_structure_type: the type of secondary structure used
            (types are according to DSSP and number to type mappings are defined in the specification)

        """
        # MMTF uses a NUL character to indicate a blank insertion code, but
        # StructureBuilder expects a space instead.
        if insertion_code == "\x00":
            insertion_code = " "

        self.structure_bulder.init_seg(' ')
        self.structure_bulder.init_residue(group_name, self.this_type,
                                           group_number, insertion_code)

    def set_model_info(self, model_id, chain_count):
        """Set the information for a model.

        :param model_id: the index for the model
        :param chain_count: the number of chains in the model

        """
        self.structure_bulder.init_model(model_id)

    def set_xtal_info(self, space_group, unit_cell):
        """Set the crystallographic information for the structure.

        :param space_group: the space group name, e.g. "P 21 21 21"
        :param unit_cell: an array of length 6 with the unit cell parameters in order: a, b, c, alpha, beta, gamma

        """
        self.structure_bulder.set_symmetry(space_group, unit_cell)

    def set_header_info(self, r_free, r_work, resolution, title,
                        deposition_date, release_date, experimnetal_methods):
        """Sets the header information.

        :param r_free: the measured R-Free for the structure
        :param r_work: the measure R-Work for the structure
        :param resolution: the resolution of the structure
        :param title: the title of the structure
        :param deposition_date: the deposition date of the structure
        :param release_date: the release date of the structure
        :param experimnetal_methods: the list of experimental methods in the structure

        """
        pass

    def set_bio_assembly_trans(self, bio_assembly_index, input_chain_indices,
                               input_transform):
        """Set the Bioassembly transformation information. A single bioassembly can have multiple transforms.

        :param bio_assembly_index: the integer index of the bioassembly
        :param input_chain_indices: the list of integer indices for the chains of this bioassembly
        :param input_transform: the list of doubles for  the transform of this bioassmbly transform.

        """
        pass

    def finalize_structure(self):
        """Any functions needed to cleanup the structure."""
        pass

    def set_group_bond(self, atom_index_one, atom_index_two, bond_order):
        """Add bonds within a group.

        :param atom_index_one: the integer atom index (in the group) of the first partner in the bond
        :param atom_index_two: the integer atom index (in the group) of the second partner in the bond
        :param bond_order: the integer bond order

        """
        pass

    def set_inter_group_bond(self, atom_index_one, atom_index_two, bond_order):
        """Add bonds between groups.

        :param atom_index_one: the integer atom index (in the structure) of the first partner in the bond
        :param atom_index_two: the integer atom index (in the structure) of the second partner in the bond
        :param bond_order: the bond order

        """
        pass
예제 #12
0
class StructureDecoder(object):
    """Class to pass the data from mmtf-python into a Biopython data structure."""
    def __init__(self):
        """Initialize the class."""
        self.this_type = ""

    def init_structure(self, total_num_bonds, total_num_atoms,
                       total_num_groups, total_num_chains, total_num_models,
                       structure_id):
        """Initialize the structure object.

        :param total_num_bonds: the number of bonds in the structure
        :param total_num_atoms: the number of atoms in the structure
        :param total_num_groups: the number of groups in the structure
        :param total_num_chains: the number of chains in the structure
        :param total_num_models: the number of models in the structure
        :param structure_id: the id of the structure (e.g. PDB id)

        """
        self.structure_bulder = StructureBuilder()
        self.structure_bulder.init_structure(structure_id=structure_id)
        self.chain_index_to_type_map = {}
        self.chain_index_to_seq_map = {}
        self.chain_index_to_description_map = {}
        self.chain_counter = 0

    def set_atom_info(self, atom_name, serial_number, alternative_location_id,
                      x, y, z, occupancy, temperature_factor, element, charge):
        """Create an atom object an set the information.

        :param atom_name: the atom name, e.g. CA for this atom
        :param serial_number: the serial id of the atom (e.g. 1)
        :param alternative_location_id: the alternative location id for the atom, if present
        :param x: the x coordiante of the atom
        :param y: the y coordinate of the atom
        :param z: the z coordinate of the atom
        :param occupancy: the occupancy of the atom
        :param temperature_factor: the temperature factor of the atom
        :param element: the element of the atom, e.g. C for carbon. According to IUPAC. Calcium  is Ca
        :param charge: the formal atomic charge of the atom

        """
        # MMTF uses "\x00" (the NUL character) to indicate to altloc, so convert
        # that to the space required by StructureBuilder
        if alternative_location_id == "\x00":
            alternative_location_id = " "

        # Atom_name is in twice - the full_name is with spaces
        self.structure_bulder.init_atom(str(atom_name),
                                        numpy.array((x, y, z), "f"),
                                        temperature_factor,
                                        occupancy,
                                        alternative_location_id,
                                        str(atom_name),
                                        serial_number=serial_number,
                                        element=str(element).upper())

    def set_chain_info(self, chain_id, chain_name, num_groups):
        """Set the chain information.

        :param chain_id: the asym chain id from mmCIF
        :param chain_name: the auth chain id from mmCIF
        :param num_groups: the number of groups this chain has

        """
        # A Bradley - chose to use chain_name (auth_id) as it complies
        # with current Biopython. Chain_id might be better.
        self.structure_bulder.init_chain(chain_id=chain_name)
        if self.chain_index_to_type_map[self.chain_counter] == "polymer":
            self.this_type = " "
        elif self.chain_index_to_type_map[self.chain_counter] == "non-polymer":
            self.this_type = "H"
        elif self.chain_index_to_type_map[self.chain_counter] == "water":
            self.this_type = "W"
        self.chain_counter += 1

    def set_entity_info(self, chain_indices, sequence, description,
                        entity_type):
        """Set the entity level information for the structure.

        :param chain_indices: the indices of the chains for this entity
        :param sequence: the one letter code sequence for this entity
        :param description: the description for this entity
        :param entity_type: the entity type (polymer,non-polymer,water)

        """
        for chain_ind in chain_indices:
            self.chain_index_to_type_map[chain_ind] = entity_type
            self.chain_index_to_seq_map[chain_ind] = sequence
            self.chain_index_to_description_map[chain_ind] = description

    def set_group_info(self, group_name, group_number, insertion_code,
                       group_type, atom_count, bond_count, single_letter_code,
                       sequence_index, secondary_structure_type):
        """Set the information for a group.

        :param group_name: the name of this group, e.g. LYS
        :param group_number: the residue number of this group
        :param insertion_code: the insertion code for this group
        :param group_type: a string indicating the type of group (as found in the chemcomp dictionary.
            Empty string if none available.
        :param atom_count: the number of atoms in the group
        :param bond_count: the number of unique bonds in the group
        :param single_letter_code: the single letter code of the group
        :param sequence_index: the index of this group in the sequence defined by the entity
        :param secondary_structure_type: the type of secondary structure used
            (types are according to DSSP and number to type mappings are defined in the specification)

        """
        # MMTF uses a NUL character to indicate a blank insertion code, but
        # StructureBuilder expects a space instead.
        if insertion_code == "\x00":
            insertion_code = " "

        self.structure_bulder.init_seg(' ')
        self.structure_bulder.init_residue(group_name, self.this_type,
                                           group_number, insertion_code)

    def set_model_info(self, model_id, chain_count):
        """Set the information for a model.

        :param model_id: the index for the model
        :param chain_count: the number of chains in the model

        """
        self.structure_bulder.init_model(model_id)

    def set_xtal_info(self, space_group, unit_cell):
        """Set the crystallographic information for the structure.

        :param space_group: the space group name, e.g. "P 21 21 21"
        :param unit_cell: an array of length 6 with the unit cell parameters in order: a, b, c, alpha, beta, gamma

        """
        self.structure_bulder.set_symmetry(space_group, unit_cell)

    def set_header_info(self, r_free, r_work, resolution, title,
                        deposition_date, release_date, experimnetal_methods):
        """Set the header information.

        :param r_free: the measured R-Free for the structure
        :param r_work: the measure R-Work for the structure
        :param resolution: the resolution of the structure
        :param title: the title of the structure
        :param deposition_date: the deposition date of the structure
        :param release_date: the release date of the structure
        :param experimnetal_methods: the list of experimental methods in the structure

        """
        pass

    def set_bio_assembly_trans(self, bio_assembly_index, input_chain_indices,
                               input_transform):
        """Set the Bioassembly transformation information. A single bioassembly can have multiple transforms.

        :param bio_assembly_index: the integer index of the bioassembly
        :param input_chain_indices: the list of integer indices for the chains of this bioassembly
        :param input_transform: the list of doubles for  the transform of this bioassmbly transform.

        """
        pass

    def finalize_structure(self):
        """Any functions needed to cleanup the structure."""
        pass

    def set_group_bond(self, atom_index_one, atom_index_two, bond_order):
        """Add bonds within a group.

        :param atom_index_one: the integer atom index (in the group) of the first partner in the bond
        :param atom_index_two: the integer atom index (in the group) of the second partner in the bond
        :param bond_order: the integer bond order

        """
        pass

    def set_inter_group_bond(self, atom_index_one, atom_index_two, bond_order):
        """Add bonds between groups.

        :param atom_index_one: the integer atom index (in the structure) of the first partner in the bond
        :param atom_index_two: the integer atom index (in the structure) of the second partner in the bond
        :param bond_order: the bond order

        """
        pass
예제 #13
0
def read_PIC(file: TextIO, verbose: bool = False) -> Structure:
    """Load Protein Internal Coordinate (.pic) data from file.

    PIC file format:
        - comment lines start with #
        - (optional) PDB HEADER record
           - idcode and deposition date recommended but optional
           - deposition date in PDB format or as changed by Biopython
        - (optional) PDB TITLE record
        - repeat:
           - Biopython Residue Full ID - sets residue IDs of returned structure
           - (optional) PDB N, CA, C ATOM records for chain start
           - (optional) PIC Hedra records for residue
           - (optional) PIC Dihedra records for residue
           - (optional) BFAC records listing AtomKeys and b-factors

    An improvement would define relative positions for HOH (water) entries.

    N.B. dihedron (i-1)C-N-CA-CB is ignored in assembly if O exists.

    C-beta is by default placed using O-C-CA-CB, but O is missing
    in some PDB file residues, which means the sidechain cannot be
    placed.  The alternate CB path (i-1)C-N-CA-CB is provided to
    circumvent this, but if this is needed then it must be adjusted in
    conjunction with PHI ((i-1)C-N-CA-C) as they overlap.  (i-1)C-N-CA-CB is
    included by default in .pic files for consistency and informational
    (e.g. statistics gathering) purposes, as otherwise the dihedron would only
    appear in the few cases it is needed for.

    :param Bio.File file: file name or handle
    :param bool verbose: complain when lines not as expected
    :returns: Biopython Structure object, Residues with .internal_coord attributes
        but no coordinates except for chain start N, CA, C atoms if supplied,
        **OR** None on parse fail (silent unless verbose=True)

    """
    pdb_hdr_re = re.compile(
        r"^HEADER\s{4}(?P<cf>.{1,40})"
        r"(?:\s+(?P<dd>\d\d\d\d-\d\d-\d\d|\d\d-\w\w\w-\d\d))?"
        r"(?:\s+(?P<id>[0-9A-Z]{4}))?\s*$")
    # ^\('(?P<pid>\w*)',\s(?P<mdl>\d+),\s'(?P<chn>\w)',\s\('(?P<het>\s|[\w-]+)',\s(?P<pos>\d+),\s'(?P<icode>\s|\w)'\)\)\s(?P<res>[A-Z]{3})\s(\[(?P<segid>[a-zA-z\s]{4})\])?\s*$
    pdb_ttl_re = re.compile(r"^TITLE\s{5}(?P<ttl>.+)\s*$")
    biop_id_re = re.compile(r"^\('(?P<pid>[^\s]*)',\s(?P<mdl>\d+),\s"
                            r"'(?P<chn>\s|\w)',\s\('(?P<het>\s|[\w\s-]+)"
                            r"',\s(?P<pos>-?\d+),\s'(?P<icode>\s|\w)'\)\)"
                            r"\s+(?P<res>[\w]{1,3})"
                            r"(\s\[(?P<segid>[a-zA-z\s]+)\])?"
                            r"\s*$")
    pdb_atm_re = re.compile(r"^ATOM\s\s(?:\s*(?P<ser>\d+))\s(?P<atm>[\w\s]{4})"
                            r"(?P<alc>\w|\s)(?P<res>[\w]{3})\s(?P<chn>.)"
                            r"(?P<pos>[\s\-\d]{4})(?P<icode>[A-Za-z\s])\s\s\s"
                            r"(?P<x>[\s\-\d\.]{8})(?P<y>[\s\-\d\.]{8})"
                            r"(?P<z>[\s\-\d\.]{8})(?P<occ>[\s\d\.]{6})"
                            r"(?P<tfac>[\s\d\.]{6})\s{6}"
                            r"(?P<segid>[a-zA-z\s]{4})(?P<elm>.{2})"
                            r"(?P<chg>.{2})?\s*$")
    bfac_re = re.compile(r"^BFAC:\s([^\s]+\s+[\-\d\.]+)"
                         r"\s*([^\s]+\s+[\-\d\.]+)?"
                         r"\s*([^\s]+\s+[\-\d\.]+)?"
                         r"\s*([^\s]+\s+[\-\d\.]+)?"
                         r"\s*([^\s]+\s+[\-\d\.]+)?")
    bfac2_re = re.compile(r"([^\s]+)\s+([\-\d\.]+)")
    struct_builder = StructureBuilder()

    # init empty header dict
    # - could use to parse HEADER and TITLE lines except
    #   deposition_date format changed from original PDB header
    header_dict = _parse_pdb_header_list([])

    curr_SMCS = [None, None, None, None]  # struct model chain seg
    SMCS_init = [
        struct_builder.init_structure,
        struct_builder.init_model,
        struct_builder.init_chain,
        struct_builder.init_seg,
    ]

    sb_res = None

    with as_handle(file, mode="r") as handle:
        for aline in handle.readlines():
            if aline.startswith("#"):
                pass  # skip comment lines
            elif aline.startswith("HEADER "):
                m = pdb_hdr_re.match(aline)
                if m:
                    header_dict["head"] = m.group("cf")  # classification
                    header_dict["idcode"] = m.group("id")
                    header_dict["deposition_date"] = m.group("dd")
                elif verbose:
                    print("Reading pic file", file, "HEADER parse fail: ",
                          aline)
            elif aline.startswith("TITLE "):
                m = pdb_ttl_re.match(aline)
                if m:
                    header_dict["name"] = m.group("ttl").strip()
                    # print('TTL: ', m.group('ttl').strip())
                elif verbose:
                    print("Reading pic file", file, "TITLE parse fail:, ",
                          aline)
            elif aline.startswith("("):  # Biopython ID line for Residue
                m = biop_id_re.match(aline)
                if m:
                    # check SMCS = Structure, Model, Chain, SegID
                    segid = m.group(9)
                    if segid is None:
                        segid = "    "
                    this_SMCS = [
                        m.group(1),
                        int(m.group(2)),
                        m.group(3), segid
                    ]
                    if curr_SMCS != this_SMCS:
                        # init new SMCS level as needed
                        for i in range(4):
                            if curr_SMCS[i] != this_SMCS[i]:
                                SMCS_init[i](this_SMCS[i])
                                curr_SMCS[i] = this_SMCS[i]
                                if 0 == i:
                                    # 0 = init structure so add header
                                    struct_builder.set_header(header_dict)
                                elif 1 == i:
                                    # new model means new chain and new segid
                                    curr_SMCS[2] = curr_SMCS[3] = None

                    struct_builder.init_residue(
                        m.group("res"),
                        m.group("het"),
                        int(m.group("pos")),
                        m.group("icode"),
                    )

                    sb_res = struct_builder.residue
                    if 2 == sb_res.is_disordered():
                        for r in sb_res.child_dict.values():
                            if not r.internal_coord:
                                sb_res = r
                                break
                    sb_res.internal_coord = IC_Residue(sb_res)
                    # print('res id:', m.groupdict())
                    # print(report_IC(struct_builder.get_structure()))
                else:
                    if verbose:
                        print("Reading pic file", file,
                              "residue ID parse fail: ", aline)
                    return None
            elif aline.startswith("ATOM "):
                m = pdb_atm_re.match(aline)
                if m:
                    if sb_res is None:
                        # ATOM without res spec already loaded, not a pic file
                        if verbose:
                            print(
                                "Reading pic file",
                                file,
                                "ATOM without residue configured:, ",
                                aline,
                            )
                        return None
                    if sb_res.resname != m.group("res") or sb_res.id[1] != int(
                            m.group("pos")):
                        if verbose:
                            print(
                                "Reading pic file",
                                file,
                                "ATOM not in configured residue (",
                                sb_res.resname,
                                str(sb_res.id),
                                "):",
                                aline,
                            )
                        return None
                    coord = numpy.array(
                        (float(m.group("x")), float(
                            m.group("y")), float(m.group("z"))),
                        "f",
                    )
                    struct_builder.init_atom(
                        m.group("atm").strip(),
                        coord,
                        float(m.group("tfac")),
                        float(m.group("occ")),
                        m.group("alc"),
                        m.group("atm"),
                        int(m.group("ser")),
                        m.group("elm").strip(),
                    )

                    # print('atom: ', m.groupdict())
                # elif verbose:
                #     print("Reading pic file", file, "ATOM parse fail:", aline)
            elif aline.startswith("BFAC: "):
                m = bfac_re.match(aline)
                if m:
                    for bfac_pair in m.groups():
                        if bfac_pair is not None:
                            m2 = bfac2_re.match(bfac_pair)
                            if m2 and sb_res is not None and sb_res.internal_coord:
                                rp = sb_res.internal_coord
                                rp.bfactors[m2.group(1)] = float(m2.group(2))
                # else:
                #    print('Reading pic file', file, 'B-factor line fail: ', aline)
            else:
                m = Edron.edron_re.match(aline)
                if m and sb_res is not None:
                    sb_res.internal_coord.load_PIC(m.groupdict())
                elif m:
                    print(
                        "PIC file: ",
                        file,
                        " error: no residue info before reading (di/h)edron data: ",
                        aline,
                    )
                    return None
                elif aline.strip():
                    if verbose:
                        print("Reading PIC file", file, "parse fail on: .",
                              aline, ".")
                    return None

    struct = struct_builder.get_structure()
    for chn in struct.get_chains():
        chnp = chn.internal_coord = IC_Chain(chn)
        # done in IC_Chain init : chnp.set_residues()
        chnp.link_residues()
        chnp.init_edra()

    # print(report_PIC(struct_builder.get_structure()))
    return struct
예제 #14
0
파일: PICIO.py 프로젝트: rob-miller/rFold
def read_PIC(file):
    """Load Protein Internal Coordinate (PIC) data from file.

    PIC file format:
        # comment lines start with #
        (optional) PDB HEADER record
            - idcode and deposition date recommended but optional
            - deposition date in PDB format or as changed by Biopython
        (optional) PDB TITLE record
        repeat:
            Biopython Residue Full ID - sets ID of returned structure
            (optional) PDB ATOM records for chain start N, CA, C
            PIC Hedra records for residue
            PIC Dihedra records for residue

    :param Bio.File file: file name or handle
    :returns: Biopython Structure object, Residues with .pic attributes
        but no coordinates except for chain start N, CA, C atoms if supplied,
        or None on parse fail (silent, no exception rasied)
    """
    pdb_hdr_re = re.compile(
        r'^HEADER\s{4}(?P<cf>.{1,40})'
        r'(?:\s+(?P<dd>\d\d\d\d-\d\d-\d\d|\d\d-\w\w\w-\d\d))?'
        r'(?:\s+(?P<id>[0-9A-Z]{4}))?\s*$', )
    # ^\('(?P<pid>\w*)',\s(?P<mdl>\d+),\s'(?P<chn>\w)',\s\('(?P<het>\s|[\w-]+)',\s(?P<pos>\d+),\s'(?P<icode>\s|\w)'\)\)\s(?P<res>[A-Z]{3})\s(\[(?P<segid>[a-zA-z\s]{4})\])?\s*$
    pdb_ttl_re = re.compile(r'^TITLE\s{5}(?P<ttl>.+)\s*$')
    biop_id_re = re.compile(r"^\('(?P<pid>\w*)',\s(?P<mdl>\d+),\s"
                            r"'(?P<chn>\s|\w)',\s\('(?P<het>\s|[\w\s-]+)"
                            r"',\s(?P<pos>-?\d+),\s'(?P<icode>\s|\w)'\)\)"
                            r'\s+(?P<res>[\w]{1,3})'
                            r'(\s\[(?P<segid>[a-zA-z\s]+)\])?'
                            r'\s*$')
    pdb_atm_re = re.compile(r'^ATOM\s\s(?:\s*(?P<ser>\d+))\s(?P<atm>[\w\s]{4})'
                            r'(?P<alc>\w|\s)(?P<res>[\w]{3})\s(?P<chn>.)'
                            r'(?P<pos>[\s\-\d]{4})(?P<icode>[A-Za-z\s])\s\s\s'
                            r'(?P<x>[\s\-\d\.]{8})(?P<y>[\s\-\d\.]{8})'
                            r'(?P<z>[\s\-\d\.]{8})(?P<occ>[\s\d\.]{6})'
                            r'(?P<tfac>[\s\d\.]{6})\s{6}'
                            r'(?P<segid>[a-zA-z\s]{4})(?P<elm>.{2})'
                            r'(?P<chg>.{2})?\s*$')
    bfac_re = re.compile(r'^BFAC:\s([^\s]+\s+[\-\d\.]+)'
                         r'\s*([^\s]+\s+[\-\d\.]+)?'
                         r'\s*([^\s]+\s+[\-\d\.]+)?'
                         r'\s*([^\s]+\s+[\-\d\.]+)?'
                         r'\s*([^\s]+\s+[\-\d\.]+)?')
    bfac2_re = re.compile(r'([^\s]+)\s+([\-\d\.]+)')
    struct_builder = StructureBuilder()

    # init empty header dict
    # - could use to parse HEADER and TITLE lines except
    #   deposition_date format changed from original PDB header
    header_dict = _parse_pdb_header_list([])

    curr_SMCS = [None, None, None, None]  # struct model chain seg
    SMCS_init = [
        struct_builder.init_structure, struct_builder.init_model,
        struct_builder.init_chain, struct_builder.init_seg
    ]

    sb_res = None

    with as_handle(file, mode='r') as handle:
        for aline in handle.readlines():
            if aline.startswith('#'):
                pass  # skip comment lines
            elif aline.startswith('HEADER '):
                m = pdb_hdr_re.match(aline)
                if m:
                    header_dict['head'] = m.group('cf')  # classification
                    header_dict['idcode'] = m.group('id')
                    header_dict['deposition_date'] = m.group('dd')
                else:
                    print('Reading pic file', file, 'HEADER fail: ', aline)
                pass
            elif aline.startswith('TITLE '):
                m = pdb_ttl_re.match(aline)
                if m:
                    header_dict['name'] = m.group('ttl').strip()
                    # print('TTL: ', m.group('ttl').strip())
                else:
                    print('Reading pic file', file, 'TITLE fail:, ', aline)
            elif aline.startswith('('):  # Biopython ID line for Residue
                m = biop_id_re.match(aline)
                if m:
                    # check SMCS = Structure, Model, Chain, SegID
                    segid = m.group(9)
                    if segid is None:
                        segid = '    '
                    this_SMCS = [
                        m.group(1),
                        int(m.group(2)),
                        m.group(3), segid
                    ]
                    if curr_SMCS != this_SMCS:
                        # init new SMCS level as needed
                        for i in range(4):
                            if curr_SMCS[i] != this_SMCS[i]:
                                SMCS_init[i](this_SMCS[i])
                                curr_SMCS[i] = this_SMCS[i]
                                if 0 == i:
                                    # 0 = init structure so add header
                                    struct_builder.set_header(header_dict)
                                elif 1 == i:
                                    # new model means new chain and new segid
                                    curr_SMCS[2] = curr_SMCS[3] = None

                    struct_builder.init_residue(m.group('res'), m.group('het'),
                                                int(m.group('pos')),
                                                m.group('icode'))

                    sb_res = struct_builder.residue
                    if 2 == sb_res.is_disordered():
                        for r in sb_res.child_dict.values():
                            if not hasattr(r, 'internal_coord'):
                                sb_res = r
                                break
                    sb_res.internal_coord = IC_Residue(sb_res)
                    # print('res id:', m.groupdict())
                    # print(report_PIC(struct_builder.get_structure()))
                else:
                    print('Reading pic file', file, 'residue fail: ', aline)
            elif aline.startswith('ATOM '):
                m = pdb_atm_re.match(aline)
                if m:
                    if sb_res is None:
                        # ATOM without res spec already loaded, not a pic file
                        print('no sb_res - not pic file', aline)
                        return None
                    if (sb_res.resname != m.group('res')
                            or sb_res.id[1] != int(m.group('pos'))):
                        # TODO: better exception here?
                        raise Exception(
                            'pic ATOM read confusion: %s %s %s' %
                            (sb_res.resname, str(sb_res.id), aline))

                    coord = numpy.array(
                        (float(m.group('x')), float(
                            m.group('y')), float(m.group('z'))), "f")
                    struct_builder.init_atom(
                        m.group('atm').strip(), coord, float(m.group('tfac')),
                        float(m.group('occ')), m.group('alc'), m.group('atm'),
                        int(m.group('ser')),
                        m.group('elm').strip())

                    # print('atom: ', m.groupdict())
                else:
                    print('Reading pic file', file, 'ATOM fail: ', aline)
            elif aline.startswith('BFAC: '):
                m = bfac_re.match(aline)
                if m:
                    for bfac_pair in m.groups():
                        if bfac_pair is not None:
                            m2 = bfac2_re.match(bfac_pair)
                            if (m2 and sb_res is not None
                                    and hasattr(sb_res, 'internal_coord')):
                                rp = sb_res.internal_coord
                                rp.bfactors[m2.group(1)] = float(m2.group(2))
            else:
                m = Edron.edron_re.match(aline)
                if m:
                    sb_res.internal_coord.load_PIC(m.groupdict())
                elif aline.strip():
                    print('Reading PIC file', file, 'parse fail on: .', aline,
                          '.')
                    return None

    struct = struct_builder.get_structure()
    for chn in struct.get_chains():
        chnp = chn.internal_coord = IC_Chain(chn)
        # done in IC_Chain init : chnp.set_residues()
        chnp.link_residues()
        chnp.render_dihedra()

    # print(report_PIC(struct_builder.get_structure()))
    return struct
예제 #15
0
            protein_letters_3to1[residue.resname] for residue in chain
            if residue.id[0] == ' '
        ])

        tbl_cg_to_aa = []

        for residue, sstype in zip(chain.child_list, martini_types):

            if residue.id[0] != ' ':  # filter HETATMS
                continue

            # Convert SS to bfactor code
            sscode = ss_to_code[sstype]
            # Coarse grain residue
            residue_restraints, beads = MARTINI(residue)
            structure_builder.init_residue(residue.resname, residue.id[0],
                                           residue.id[1], residue.id[2])
            # Populate residue
            for name, coord in beads:
                structure_builder.init_atom(name, coord, sscode, 1.00, " ",
                                            name, nbeads, "C")
                nbeads += 1
            # Save restraints
            tbl_cg_to_aa.extend(residue_restraints)

cg_model = structure_builder.get_structure()

# output sequence and ss information
print("%s:\t" % chain.id, aa_seq)
print("%s:\t" % chain.id, martini_types)
print("%s:\t" % chain.id,
      ''.join(map(lambda x: str(ss_to_code[x]), martini_types)))
예제 #16
0
def pdb_extract(structure, **kwargs):

    # model to extract from pdb
    extract_model = None if not 'model' in kwargs else kwargs['model']
    new_model_id = -1 if not 'new_model' in kwargs else kwargs['new_model']
    extract_chain = None if not 'chain' in kwargs else kwargs['chain']
    first_res = None if not 'first_res' in kwargs else kwargs['first_res']
    last_res = None if not 'last_res' in kwargs else kwargs['last_res']
    new_first_res = None if not 'new_first_res' in kwargs else kwargs[
        'new_first_res']
    gap_count = 0 if not 'gap_count' in kwargs else kwargs['gap_count']
    water_id = None if not 'water' in kwargs else kwargs['water']

    model_rebumber_flag = bool((extract_model is not None)
                               or new_model_id >= 0)
    res_renumber_flag = bool(first_res or last_res or new_first_res
                             or gap_count)

    structure_builder = StructureBuilder()
    structure_builder.init_structure('pdb_extract')
    structure_builder.set_line_counter(0)
    line_counter = 0
    start_resseq_by_default = 1 if not new_first_res else new_first_res

    for model in structure:
        if model_rebumber_flag and \
                ( extract_model is not None ) and model.get_id() != extract_model:
            continue

        if model_rebumber_flag and new_model_id >= 0:
            this_model_id = new_model_id
            new_model_id += 1
        else:
            this_model_id = model.get_id()
        structure_builder.init_model(this_model_id, this_model_id)

        for chain in model:
            if extract_chain and chain.get_id() != extract_chain:
                continue
            structure_builder.init_seg(' ')
            structure_builder.init_chain(chain.get_id())
            resdict = {}
            if res_renumber_flag:
                # first_res = res_range_tuple[0]
                # last_res = res_range_tuple[1]

                resdict['before'] = select_residues_from_chain(
                    chain, first_res=first_res, gap_count=gap_count)
                resdict['hit'] = select_residues_from_chain(
                    chain, first_res=first_res, last_res=last_res)
                resdict['after'] = select_residues_from_chain(
                    chain, last_res=last_res, gap_count=gap_count)
            else:
                resdict['before'] = []
                resdict['hit'] = chain.get_list()
                resdict['after'] = []
            new_resseq = start_resseq_by_default - len(resdict['before'])
            resdict['water'] = chain_water_id(chain, water_id)
            for key in ['before', 'hit', 'after', 'water']:
                for residue in resdict[key]:
                    if res_renumber_flag:
                        new_resid = ' ', new_resseq, ' '
                    else:
                        new_resid = residue.get_id()
                    structure_builder.init_residue(residue.get_resname(),
                                                   *new_resid)
                    residue_atoms = None
                    if key == 'before':
                        residue_atoms = [atom for atom in residue if \
                                         (atom.get_name() == 'C' or atom.get_name() == 'O')]
                    elif key == 'hit' or key == 'water':
                        residue_atoms = residue.get_list()
                    elif key == 'after':
                        residue_atoms = [atom for atom in residue if \
                                         (atom.get_name() == 'N' or atom.get_name() == 'HN')]
                    for atom in residue_atoms:
                        structure_builder.init_atom(atom.get_name(),
                                                    atom.get_coord(),
                                                    atom.get_bfactor(),
                                                    atom.get_occupancy(),
                                                    atom.get_altloc(),
                                                    atom.get_fullname())
                        structure_builder.set_line_counter(line_counter)
                        line_counter += 1
                    new_resseq += 1
                    if key == 'water' and gap_count and len(
                            resdict['after']) != gap_count:
                        new_resseq += gap_count - len(resdict['after'])

    out_structure = structure_builder.get_structure()
    return out_structure
예제 #17
0
def read_PIC(
    file: TextIO,
    verbose: bool = False,
    quick: bool = False,
    defaults: bool = False,
) -> Structure:
    """Load Protein Internal Coordinate (.pic) data from file.

    PIC file format:
        - comment lines start with #
        - (optional) PDB HEADER record
           - idcode and deposition date recommended but optional
           - deposition date in PDB format or as changed by Biopython
        - (optional) PDB TITLE record
        - repeat:
           - Biopython Residue Full ID - sets residue IDs of returned structure
           - (optional) PDB N, CA, C ATOM records for chain start
           - (optional) PIC Hedra records for residue
           - (optional) PIC Dihedra records for residue
           - (optional) BFAC records listing AtomKeys and b-factors

    An improvement would define relative positions for HOH (water) entries.

    Defaults will be supplied for any value if defaults=True.  Default values
    are supplied in ic_data.py, but structures degrade quickly with any
    deviation from true coordinates.  Experiment with
    :data:`Bio.PDB.internal_coords.IC_Residue.pic_flags` options to
    :func:`write_PIC` to verify this.

    N.B. dihedron (i-1)C-N-CA-CB is ignored in assembly if O exists.

    C-beta is by default placed using O-C-CA-CB, but O is missing
    in some PDB file residues, which means the sidechain cannot be
    placed.  The alternate CB path (i-1)C-N-CA-CB is provided to
    circumvent this, but if this is needed then it must be adjusted in
    conjunction with PHI ((i-1)C-N-CA-C) as they overlap (see :meth:`.bond_set`
    and :meth:`.bond_rotate` to handle this automatically).

    :param Bio.File file: :func:`.as_handle` file name or handle
    :param bool verbose: complain when lines not as expected
    :param bool quick: don't check residues for all dihedra (no default values)
    :param bool defaults: create di/hedra as needed from reference database.
        Amide proton created if 'H' is in IC_Residue.accept_atoms
    :returns: Biopython Structure object, Residues with .internal_coord
        attributes but no coordinates except for chain start N, CA, C atoms if
        supplied, **OR** None on parse fail (silent unless verbose=True)

    """
    proton = "H" in IC_Residue.accept_atoms

    pdb_hdr_re = re.compile(
        r"^HEADER\s{4}(?P<cf>.{1,40})"
        r"(?:\s+(?P<dd>\d\d\d\d-\d\d-\d\d|\d\d-\w\w\w-\d\d))?"
        r"(?:\s+(?P<id>[0-9A-Z]{4}))?\s*$")
    pdb_ttl_re = re.compile(r"^TITLE\s{5}(?P<ttl>.+)\s*$")
    biop_id_re = re.compile(r"^\('(?P<pid>[^\s]*)',\s(?P<mdl>\d+),\s"
                            r"'(?P<chn>\s|\w)',\s\('(?P<het>\s|[\w\s-]+)"
                            r"',\s(?P<pos>-?\d+),\s'(?P<icode>\s|\w)'\)\)"
                            r"\s+(?P<res>[\w]{1,3})"
                            r"(\s\[(?P<segid>[a-zA-z\s]+)\])?"
                            r"\s*$")
    pdb_atm_re = re.compile(r"^ATOM\s\s(?:\s*(?P<ser>\d+))\s(?P<atm>[\w\s]{4})"
                            r"(?P<alc>\w|\s)(?P<res>[\w]{3})\s(?P<chn>.)"
                            r"(?P<pos>[\s\-\d]{4})(?P<icode>[A-Za-z\s])\s\s\s"
                            r"(?P<x>[\s\-\d\.]{8})(?P<y>[\s\-\d\.]{8})"
                            r"(?P<z>[\s\-\d\.]{8})(?P<occ>[\s\d\.]{6})"
                            r"(?P<tfac>[\s\d\.]{6})\s{6}"
                            r"(?P<segid>[a-zA-z\s]{4})(?P<elm>.{2})"
                            r"(?P<chg>.{2})?\s*$")
    bfac_re = re.compile(r"^BFAC:\s([^\s]+\s+[\-\d\.]+)"
                         r"\s*([^\s]+\s+[\-\d\.]+)?"
                         r"\s*([^\s]+\s+[\-\d\.]+)?"
                         r"\s*([^\s]+\s+[\-\d\.]+)?"
                         r"\s*([^\s]+\s+[\-\d\.]+)?")
    bfac2_re = re.compile(r"([^\s]+)\s+([\-\d\.]+)")
    struct_builder = StructureBuilder()

    # init empty header dict
    # - could use to parse HEADER and TITLE lines except
    #   deposition_date format changed from original PDB header
    header_dict = _parse_pdb_header_list([])

    curr_SMCS = [None, None, None, None]  # struct model chain seg
    SMCS_init = [
        struct_builder.init_structure,
        struct_builder.init_model,
        struct_builder.init_chain,
        struct_builder.init_seg,
    ]

    sb_res = None
    rkl = None
    sb_chain = None
    sbcic = None
    sbric = None

    akc = {}
    hl12 = {}
    ha = {}
    hl23 = {}
    da = {}
    bfacs = {}

    orphan_aks = set()  # []

    tr = []  # this residue
    pr = []  # previous residue

    def akcache(akstr: str) -> AtomKey:
        """Maintain dictionary of AtomKeys seen while reading this PIC file."""
        # akstr: full AtomKey string read from .pic file, includes residue info
        try:
            return akc[akstr]
        except (KeyError):
            ak = akc[akstr] = AtomKey(akstr)
            return ak

    def link_residues(ppr: List[Residue], pr: List[Residue]) -> None:
        """Set next and prev links between i-1 and i-2 residues."""
        for p_r in pr:
            pric = p_r.internal_coord
            for p_p_r in ppr:
                ppric = p_p_r.internal_coord
                if p_r.id[0] == " ":  # not heteroatoms
                    if pric not in ppric.rnext:
                        ppric.rnext.append(pric)
                if p_p_r.id[0] == " ":
                    if ppric not in pric.rprev:
                        pric.rprev.append(ppric)

    def process_hedron(
        a1: str,
        a2: str,
        a3: str,
        l12: str,
        ang: str,
        l23: str,
        ric: IC_Residue,
    ) -> Tuple:
        """Create Hedron on current (sbcic) Chain.internal_coord."""
        ek = (akcache(a1), akcache(a2), akcache(a3))
        atmNdx = AtomKey.fields.atm
        accpt = IC_Residue.accept_atoms
        if not all(ek[i].akl[atmNdx] in accpt for i in range(3)):
            return
        hl12[ek] = float(l12)
        ha[ek] = float(ang)
        hl23[ek] = float(l23)
        sbcic.hedra[ek] = ric.hedra[ek] = h = Hedron(ek)
        h.cic = sbcic
        ak_add(ek, ric)
        return ek

    def default_hedron(ek: Tuple, ric: IC_Residue) -> None:
        """Create Hedron based on same rdh_class hedra in ref database.

        Adds Hedron to current Chain.internal_coord, see ic_data for default
        values and reference database source.
        """
        aks = []
        hkey = None

        atmNdx = AtomKey.fields.atm
        resNdx = AtomKey.fields.resname
        resPos = AtomKey.fields.respos
        aks = [ek[i].akl for i in range(3)]

        atpl = tuple([aks[i][atmNdx] for i in range(3)])
        res = aks[0][resNdx]
        if (aks[0][resPos] !=
                aks[2][resPos]  # hedra crosses amide bond so not reversed
                or atpl == ("N", "CA", "C")  # or chain start tau
                or
                atpl in ic_data_backbone  # or found forward hedron in ic_data
                or
            (res not in ["A", "G"] and atpl in ic_data_sidechains[res])):
            hkey = ek
            rhcl = [aks[i][resNdx] + aks[i][atmNdx] for i in range(3)]
            try:
                dflts = hedra_defaults["".join(rhcl)][0]
            except KeyError:
                if aks[0][resPos] == aks[1][resPos]:
                    rhcl = [aks[i][resNdx] + aks[i][atmNdx] for i in range(2)]
                    rhc = "".join(rhcl) + "X" + aks[2][atmNdx]
                else:
                    rhcl = [
                        aks[i][resNdx] + aks[i][atmNdx] for i in range(1, 3)
                    ]
                    rhc = "X" + aks[0][atmNdx] + "".join(rhcl)
                dflts = hedra_defaults[rhc][0]
        else:
            # must be reversed or fail
            hkey = ek[::-1]
            rhcl = [aks[i][resNdx] + aks[i][atmNdx] for i in range(2, -1, -1)]
            dflts = hedra_defaults["".join(rhcl)][0]

        process_hedron(
            str(hkey[0]),
            str(hkey[1]),
            str(hkey[2]),
            dflts[0],
            dflts[1],
            dflts[2],
            ric,
        )

        if verbose:
            print(f" default for {ek}")

    def hedra_check(dk: str, ric: IC_Residue) -> None:
        """Confirm both hedra present for dihedron key, use default if set."""
        if dk[0:3] not in sbcic.hedra and dk[2::-1] not in sbcic.hedra:
            if defaults:
                default_hedron(dk[0:3], ric)
            else:
                print(f"{dk} missing h1")
        if dk[1:4] not in sbcic.hedra and dk[3:0:-1] not in sbcic.hedra:
            if defaults:
                default_hedron(dk[1:4], ric)
            else:
                print(f"{dk} missing h2")

    def process_dihedron(a1: str, a2: str, a3: str, a4: str, dangle: str,
                         ric: IC_Residue) -> Set:
        """Create Dihedron on current Chain.internal_coord."""
        ek = (
            akcache(a1),
            akcache(a2),
            akcache(a3),
            akcache(a4),
        )
        atmNdx = AtomKey.fields.atm
        accpt = IC_Residue.accept_atoms
        if not all(ek[i].akl[atmNdx] in accpt for i in range(4)):
            return
        da[ek] = float(dangle)
        sbcic.dihedra[ek] = ric.dihedra[ek] = d = Dihedron(ek)
        d.cic = sbcic
        if not quick:
            hedra_check(ek, ric)
        ak_add(ek, ric)
        return ek

    def default_dihedron(ek: List, ric: IC_Residue) -> None:
        """Create Dihedron based on same residue class dihedra in ref database.

        Adds Dihedron to current Chain.internal_coord, see ic_data for default
        values and reference database source.
        """
        atmNdx = AtomKey.fields.atm
        resNdx = AtomKey.fields.resname
        resPos = AtomKey.fields.respos

        rdclass = ""
        dclass = ""
        for ak in ek:
            dclass += ak.akl[atmNdx]
            rdclass += ak.akl[resNdx] + ak.akl[atmNdx]
        if dclass == "NCACN":
            rdclass = rdclass[0:7] + "XN"
        elif dclass == "CACNCA":
            rdclass = "XCAXC" + rdclass[5:]
        elif dclass == "CNCAC":
            rdclass = "XC" + rdclass[2:]
        if rdclass in dihedra_primary_defaults:
            process_dihedron(
                str(ek[0]),
                str(ek[1]),
                str(ek[2]),
                str(ek[3]),
                dihedra_primary_defaults[rdclass][0],
                ric,
            )

            if verbose:
                print(f" default for {ek}")

        elif rdclass in dihedra_secondary_defaults:
            primAngle, offset = dihedra_secondary_defaults[rdclass]
            rname = ek[2].akl[resNdx]
            rnum = int(ek[2].akl[resPos])
            paKey = None
            if primAngle == ("N", "CA", "C", "N") and ek[0].ric.rnext != []:
                paKey = [
                    AtomKey((rnum, None, rname, primAngle[x], None, None))
                    for x in range(3)
                ]
                rnext = ek[0].ric.rnext
                paKey.append(
                    AtomKey((
                        rnext[0].rbase[0],
                        None,
                        rnext[0].rbase[2],
                        "N",
                        None,
                        None,
                    )))
                paKey = tuple(paKey)
            elif primAngle == ("CA", "C", "N", "CA"):
                prname = pr.akl[0][resNdx]
                prnum = pr.akl[0][resPos]
                paKey = [
                    AtomKey(prnum, None, prname, primAngle[x], None, None)
                    for x in range(0, 2)
                ]
                paKey.add([
                    AtomKey((rnum, None, rname, primAngle[x], None, None))
                    for x in range(2, 4)
                ])
                paKey = tuple(paKey)
            else:
                paKey = tuple(
                    AtomKey((rnum, None, rname, atm, None, None))
                    for atm in primAngle)

            if paKey in da:
                process_dihedron(
                    str(ek[0]),
                    str(ek[1]),
                    str(ek[2]),
                    str(ek[3]),
                    da[paKey] + dihedra_secondary_defaults[rdclass][1],
                    ric,
                )

                if verbose:
                    print(f" secondary default for {ek}")

            elif rdclass in dihedra_secondary_xoxt_defaults:
                if primAngle == ("C", "N", "CA", "C"):  # primary for alt cb
                    # no way to trigger alt cb with default=True
                    # because will generate default N-CA-C-O
                    prname = pr.akl[0][resNdx]
                    prnum = pr.akl[0][resPos]
                    paKey = [
                        AtomKey(prnum, None, prname, primAngle[0], None, None)
                    ]
                    paKey.add([
                        AtomKey((rnum, None, rname, primAngle[x], None, None))
                        for x in range(1, 4)
                    ])
                    paKey = tuple(paKey)
                else:
                    primAngle, offset = dihedra_secondary_xoxt_defaults[
                        rdclass]
                    rname = ek[2].akl[resNdx]
                    rnum = int(ek[2].akl[resPos])
                    paKey = tuple(
                        AtomKey((rnum, None, rname, atm, None, None))
                        for atm in primAngle)

                if paKey in da:
                    process_dihedron(
                        str(ek[0]),
                        str(ek[1]),
                        str(ek[2]),
                        str(ek[3]),
                        da[paKey] + offset,
                        ric,
                    )

                    if verbose:
                        print(f" oxt default for {ek}")

                else:
                    print(f"missing primary angle {paKey} {primAngle} to "
                          f"generate {rnum}{rname} {rdclass}")
        else:
            print(
                f"missing {ek} -> {rdclass} ({dclass}) not found in primary or"
                " secondary defaults")

    def dihedra_check(ric: IC_Residue) -> None:
        """Look for required dihedra in residue, generate defaults if set."""

        # rnext should be set
        def ake_recurse(akList: List) -> List:
            """Bulid combinatorics of AtomKey lists."""
            car = akList[0]
            if len(akList) > 1:
                retList = []
                for ak in car:
                    cdr = akList[1:]
                    rslt = ake_recurse(cdr)
                    for r in rslt:
                        r.insert(0, ak)
                        retList.append(r)
                return retList
            else:
                if len(car) == 1:
                    return [list(car)]
                else:
                    retList = [[ak] for ak in car]
                    return retList

        def ak_expand(eLst: List) -> List:
            """Expand AtomKey list with altlocs, all combinatorics."""
            retList = []
            for edron in eLst:
                newList = []
                for ak in edron:
                    rslt = ak.ric.split_akl([ak])
                    rlst = [r[0] for r in rslt]
                    if rlst != []:
                        newList.append(rlst)
                    else:
                        newList.append([ak])
                rslt = ake_recurse(newList)
                for r in rslt:
                    retList.append(r)
            return retList

        # dihedra_check processing starts here
        # generate the list of dihedra this residue should have
        chkLst = []
        sN, sCA, sC = AtomKey(ric, "N"), AtomKey(ric, "CA"), AtomKey(ric, "C")
        sO, sCB, sH = AtomKey(ric, "O"), AtomKey(ric, "CB"), AtomKey(ric, "H")
        if ric.rnext != []:
            for rn in ric.rnext:
                nN, nCA, nC = (
                    AtomKey(rn, "N"),
                    AtomKey(rn, "CA"),
                    AtomKey(rn, "C"),
                )
                # intermediate residue, need psi, phi, omg
                chkLst.append((sN, sCA, sC, nN))  # psi
                chkLst.append((sCA, sC, nN, nCA))  # omg i+1
                chkLst.append((sC, nN, nCA, nC))  # phi i+1
        else:
            chkLst.append((sN, sCA, sC, AtomKey(ric, "OXT")))  # psi
            rn = "(no rnext)"

        chkLst.append((sN, sCA, sC, sO))  # locate backbone O
        if ric.lc != "G":
            chkLst.append((sO, sC, sCA, sCB))  # locate CB
        if ric.rprev != [] and ric.lc != "P" and proton:
            chkLst.append((sC, sCA, sN, sH))  # amide proton

        try:
            for edron in ic_data_sidechains[ric.lc]:
                if len(edron) > 3:  # dihedra only
                    if all(not atm[0] == "H" for atm in edron):
                        akl = [AtomKey(ric, atm) for atm in edron[0:4]]
                        chkLst.append(akl)
        except KeyError:
            pass

        # now compare generated list to ric.dihedra, get defaults if set.
        chkLst = ak_expand(chkLst)
        altloc_ndx = AtomKey.fields.altloc

        for dk in chkLst:
            if tuple(dk) in ric.dihedra:
                pass
            elif sH in dk:
                pass  # ignore missing hydrogens
            elif all(atm.akl[altloc_ndx] is None for atm in dk):
                if defaults:
                    default_dihedron(dk, ric)
                else:
                    if verbose:
                        print(f"{ric}-{rn} missing {dk}")
            else:
                # print(f"skip {ek}")
                pass  # ignore missing combinatoric of altloc atoms
                # need more here?

    def ak_add(ek: set, ric: IC_Residue) -> None:
        """Allocate edron key AtomKeys to current residue as appropriate.

        A hedron or dihedron may span a backbone amide bond, this routine
        allocates atoms in the (h/di)edron to the ric residue or saves them
        for a residue yet to be processed.

        :param set ek: AtomKeys in edron
        :param IC_Residue ric: current residue to assign AtomKeys to
        """
        res = ric.residue
        reskl = (
            str(res.id[1]),
            (None if res.id[2] == " " else res.id[2]),
            ric.lc,
        )
        for ak in ek:
            if ak.ric is None:
                sbcic.akset.add(ak)
                if ak.akl[0:3] == reskl:
                    ak.ric = ric
                    ric.ak_set.add(ak)
                else:
                    orphan_aks.add(ak)

    def finish_chain() -> None:
        """Do last rnext, rprev links and process chain edra data."""
        link_residues(pr, tr)
        # check/confirm completeness
        if not quick:
            for r in pr:
                dihedra_check(r.internal_coord)
            for r in tr:
                dihedra_check(r.internal_coord)

        if ha != {}:
            sha = {k: ha[k] for k in sorted(ha)}
            shl12 = {k: hl12[k] for k in sorted(hl12)}
            shl23 = {k: hl23[k] for k in sorted(hl23)}
            sbcic._hedraDict2chain(shl12, sha, shl23, da, bfacs)

    # read_PIC processing starts here:
    with as_handle(file, mode="r") as handle:
        for line in handle.readlines():
            if line.startswith("#"):
                pass  # skip comment lines
            elif line.startswith("HEADER "):
                m = pdb_hdr_re.match(line)
                if m:
                    header_dict["head"] = m.group("cf")  # classification
                    header_dict["idcode"] = m.group("id")
                    header_dict["deposition_date"] = m.group("dd")
                elif verbose:
                    print("Reading pic file", file, "HEADER parse fail: ",
                          line)
            elif line.startswith("TITLE "):
                m = pdb_ttl_re.match(line)
                if m:
                    header_dict["name"] = m.group("ttl").strip()
                    # print('TTL: ', m.group('ttl').strip())
                elif verbose:
                    print("Reading pic file", file, "TITLE parse fail:, ",
                          line)
            elif line.startswith("("):  # Biopython ID line for Residue
                m = biop_id_re.match(line)
                if m:
                    # check SMCS = Structure, Model, Chain, SegID
                    segid = m.group(9)
                    if segid is None:
                        segid = "    "
                    this_SMCS = [
                        m.group(1),
                        int(m.group(2)),
                        m.group(3),
                        segid,
                    ]
                    if curr_SMCS != this_SMCS:
                        if curr_SMCS[:3] != this_SMCS[:3] and ha != {}:
                            # chain change so process current chain data
                            finish_chain()

                            akc = {}  # atomkey cache, used by akcache()
                            hl12 = {}  # hedra key -> len12
                            ha = {}  # -> hedra angle
                            hl23 = {}  # -> len23
                            da = {}  # dihedra key -> angle value
                            bfacs = {}  # atomkey string -> b-factor
                        # init new Biopython SMCS level as needed
                        for i in range(4):
                            if curr_SMCS[i] != this_SMCS[i]:
                                SMCS_init[i](this_SMCS[i])
                                curr_SMCS[i] = this_SMCS[i]
                                if i == 0:
                                    # 0 = init structure so add header
                                    struct_builder.set_header(header_dict)
                                elif i == 1:
                                    # new model means new chain and new segid
                                    curr_SMCS[2] = curr_SMCS[3] = None
                                elif i == 2:
                                    # new chain so init internal_coord
                                    sb_chain = struct_builder.chain
                                    sbcic = sb_chain.internal_coord = IC_Chain(
                                        sb_chain)

                    struct_builder.init_residue(
                        m.group("res"),
                        m.group("het"),
                        int(m.group("pos")),
                        m.group("icode"),
                    )

                    sb_res = struct_builder.residue
                    if sb_res.id[0] != " ":  # skip hetatm
                        continue
                    if 2 == sb_res.is_disordered():
                        for r in sb_res.child_dict.values():
                            if not r.internal_coord:
                                sb_res = r
                                break
                        # added to disordered res
                        tr.append(sb_res)
                    else:
                        # new res so fix up previous residue as feasible
                        link_residues(pr, tr)

                        if not quick:
                            for r in pr:
                                # create di/hedra if default for residue i-1
                                # just linked
                                dihedra_check(r.internal_coord)

                        pr = tr
                        tr = [sb_res]

                    sbric = sb_res.internal_coord = IC_Residue(
                        sb_res)  # no atoms so no rak
                    sbric.cic = sbcic
                    rkl = (
                        str(sb_res.id[1]),
                        (None if sb_res.id[2] == " " else sb_res.id[2]),
                        sbric.lc,
                    )
                    sbcic.ordered_aa_ic_list.append(sbric)

                    # update AtomKeys w/o IC_Residue references, in case
                    # chain ends before di/hedra sees them (2XHE test case)
                    for ak in orphan_aks:
                        if ak.akl[0:3] == rkl:
                            ak.ric = sbric
                            sbric.ak_set.add(ak)
                            # may need altoc support here
                    orphan_aks = set(
                        filter(lambda ak: ak.ric is None, orphan_aks))

                else:
                    if verbose:
                        print(
                            "Reading pic file",
                            file,
                            "residue ID parse fail: ",
                            line,
                        )
                    return None
            elif line.startswith("ATOM "):
                m = pdb_atm_re.match(line)
                if m:
                    if sb_res is None:
                        # ATOM without res spec already loaded, not a pic file
                        if verbose:
                            print(
                                "Reading pic file",
                                file,
                                "ATOM without residue configured:, ",
                                line,
                            )
                        return None
                    if sb_res.resname != m.group("res") or sb_res.id[1] != int(
                            m.group("pos")):
                        if verbose:
                            print(
                                "Reading pic file",
                                file,
                                "ATOM not in configured residue (",
                                sb_res.resname,
                                str(sb_res.id),
                                "):",
                                line,
                            )
                        return None
                    coord = numpy.array(
                        (
                            float(m.group("x")),
                            float(m.group("y")),
                            float(m.group("z")),
                        ),
                        "f",
                    )
                    struct_builder.init_atom(
                        m.group("atm").strip(),
                        coord,
                        float(m.group("tfac")),
                        float(m.group("occ")),
                        m.group("alc"),
                        m.group("atm"),
                        int(m.group("ser")),
                        m.group("elm").strip(),
                    )

                    # reset because prev does not link to this residue
                    # (chainBreak)
                    pr = []

            elif line.startswith("BFAC: "):
                m = bfac_re.match(line)
                if m:
                    for bfac_pair in m.groups():
                        if bfac_pair is not None:
                            m2 = bfac2_re.match(bfac_pair)
                            bfacs[m2.group(1)] = float(m2.group(2))
                # else:
                #    print f"Reading pic file {file} B-factor fail: {line}"
            else:
                m = Edron.edron_re.match(line)
                if m and sb_res is not None:
                    if m["a4"] is None:
                        process_hedron(
                            m["a1"],
                            m["a2"],
                            m["a3"],
                            m["len12"],
                            m["angle"],
                            m["len23"],
                            sb_res.internal_coord,
                        )
                    else:
                        process_dihedron(
                            m["a1"],
                            m["a2"],
                            m["a3"],
                            m["a4"],
                            float(m["dihedral"]),
                            sb_res.internal_coord,
                        )

                elif m:
                    print(
                        "PIC file: ",
                        file,
                        " error: no residue info before reading (di/h)edron: ",
                        line,
                    )
                    return None
                elif line.strip():
                    if verbose:
                        print(
                            "Reading PIC file",
                            file,
                            "parse fail on: .",
                            line,
                            ".",
                        )
                    return None

    # reached end of input
    finish_chain()

    # print(report_PIC(struct_builder.get_structure()))
    return struct_builder.get_structure()