Ejemplo n.º 1
0
    def _build_structure(self, structure_id, filehandle):

        # two special chars as placeholders in the mmCIF format
        # for item values that cannot be explicitly assigned
        # see: pdbx/mmcif syntax web page
        _unassigned = {".", "?"}

        # Read only _atom_site. and atom_site_anisotrop entries
        read_atom, read_aniso = False, False
        _fields, _records = [], []
        _anisof, _anisors = [], []
        for line in filehandle:
            if line.startswith("_atom_site."):
                read_atom = True
                _fields.append(line.strip())
            elif line.startswith("_atom_site_anisotrop."):
                read_aniso = True
                _anisof.append(line.strip())
            elif read_atom and line.startswith("#"):
                read_atom = False
            elif read_aniso and line.startswith("#"):
                read_aniso = False
            elif read_atom:
                _records.append(line.strip())
            elif read_aniso:
                _anisors.append(line.strip())

        # Dumping the shlex module here since this particular
        # category should be rather straightforward.
        # Quite a performance boost..
        _record_tbl = zip(*map(str.split, _records))
        _anisob_tbl = zip(*map(str.split, _anisors))

        mmcif_dict = dict(zip(_fields, _record_tbl))
        mmcif_dict.update(dict(zip(_anisof, _anisob_tbl)))

        # Build structure object
        atom_id_list = mmcif_dict["_atom_site.label_atom_id"]
        residue_id_list = mmcif_dict["_atom_site.label_comp_id"]

        try:
            element_list = mmcif_dict["_atom_site.type_symbol"]
        except KeyError:
            element_list = None

        chain_id_list = mmcif_dict["_atom_site.auth_asym_id"]

        x_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_x"]]
        y_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_y"]]
        z_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_z"]]
        alt_list = mmcif_dict["_atom_site.label_alt_id"]
        icode_list = mmcif_dict["_atom_site.pdbx_PDB_ins_code"]
        b_factor_list = mmcif_dict["_atom_site.B_iso_or_equiv"]
        occupancy_list = mmcif_dict["_atom_site.occupancy"]
        fieldname_list = mmcif_dict["_atom_site.group_PDB"]

        try:
            serial_list = [
                int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]
            ]
        except KeyError:
            # No model number column
            serial_list = None
        except ValueError:
            # Invalid model number (malformed file)
            raise PDBConstructionException("Invalid model number")

        try:
            aniso_u11 = mmcif_dict["_atom_site_anisotrop.U[1][1]"]
            aniso_u12 = mmcif_dict["_atom_site_anisotrop.U[1][2]"]
            aniso_u13 = mmcif_dict["_atom_site_anisotrop.U[1][3]"]
            aniso_u22 = mmcif_dict["_atom_site_anisotrop.U[2][2]"]
            aniso_u23 = mmcif_dict["_atom_site_anisotrop.U[2][3]"]
            aniso_u33 = mmcif_dict["_atom_site_anisotrop.U[3][3]"]
            aniso_flag = 1
        except KeyError:
            # no anisotropic B factors
            aniso_flag = 0

        # if auth_seq_id is present, we use this.
        # Otherwise label_seq_id is used.
        if "_atom_site.auth_seq_id" in mmcif_dict:
            seq_id_list = mmcif_dict["_atom_site.auth_seq_id"]
        else:
            seq_id_list = mmcif_dict["_atom_site.label_seq_id"]

        # Now loop over atoms and build the structure
        current_chain_id = None
        current_residue_id = None
        current_resname = None
        structure_builder = self._structure_builder
        structure_builder.init_structure(structure_id)
        structure_builder.init_seg(" ")

        # Historically, Biopython PDB parser uses model_id to mean array index
        # so serial_id means the Model ID specified in the file
        current_model_id = -1
        current_serial_id = -1
        for i in range(0, len(atom_id_list)):

            # set the line_counter for 'ATOM' lines only and not
            # as a global line counter found in the PDBParser()
            # this number should match the '_atom_site.id' index in the MMCIF
            structure_builder.set_line_counter(i)

            x = x_list[i]
            y = y_list[i]
            z = z_list[i]
            resname = residue_id_list[i]
            chainid = chain_id_list[i]
            altloc = alt_list[i]
            if altloc in _unassigned:
                altloc = " "
            int_resseq = int(seq_id_list[i])
            icode = icode_list[i]
            if icode in _unassigned:
                icode = " "
            # Remove occasional " from quoted atom names (e.g. xNA)
            name = atom_id_list[i].strip('"')

            # occupancy & B factor
            try:
                tempfactor = float(b_factor_list[i])
            except ValueError:
                raise PDBConstructionException("Invalid or missing B factor")

            try:
                occupancy = float(occupancy_list[i])
            except ValueError:
                raise PDBConstructionException("Invalid or missing occupancy")

            fieldname = fieldname_list[i]
            if fieldname == "HETATM":
                hetatm_flag = "H"
            else:
                hetatm_flag = " "

            resseq = (hetatm_flag, int_resseq, icode)

            if serial_list is not None:
                # model column exists; use it
                serial_id = serial_list[i]
                if current_serial_id != serial_id:
                    # if serial changes, update it and start new model
                    current_serial_id = serial_id
                    current_model_id += 1
                    structure_builder.init_model(current_model_id,
                                                 current_serial_id)
                    current_chain_id = None
                    current_residue_id = None
                    current_resname = None
            else:
                # no explicit model column; initialize single model
                structure_builder.init_model(current_model_id)

            if current_chain_id != chainid:
                current_chain_id = chainid
                structure_builder.init_chain(current_chain_id)
                current_residue_id = None
                current_resname = None

            if current_residue_id != resseq or current_resname != resname:
                current_residue_id = resseq
                current_resname = resname
                structure_builder.init_residue(resname, hetatm_flag,
                                               int_resseq, icode)

            coord = numpy.array((x, y, z), "f")
            element = element_list[i] if element_list else None
            structure_builder.init_atom(name,
                                        coord,
                                        tempfactor,
                                        occupancy,
                                        altloc,
                                        name,
                                        element=element)
            if aniso_flag == 1 and i < len(aniso_u11):
                u = (
                    aniso_u11[i],
                    aniso_u12[i],
                    aniso_u13[i],
                    aniso_u22[i],
                    aniso_u23[i],
                    aniso_u33[i],
                )
                mapped_anisou = [float(_) for _ in u]
                anisou_array = numpy.array(mapped_anisou, "f")
                structure_builder.set_anisou(anisou_array)
Ejemplo n.º 2
0
    def _build_structure(self, structure_id):

        # two special chars as placeholders in the mmCIF format
        # for item values that cannot be explicitly assigned
        # see: pdbx/mmcif syntax web page
        _unassigned = {".", "?"}

        mmcif_dict = self._mmcif_dict
        atom_id_list = mmcif_dict["_atom_site.label_atom_id"]
        residue_id_list = mmcif_dict["_atom_site.label_comp_id"]
        try:
            element_list = mmcif_dict["_atom_site.type_symbol"]
        except KeyError:
            element_list = None
        chain_id_list = mmcif_dict["_atom_site.auth_asym_id"]
        x_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_x"]]
        y_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_y"]]
        z_list = [float(x) for x in mmcif_dict["_atom_site.Cartn_z"]]
        alt_list = mmcif_dict["_atom_site.label_alt_id"]
        icode_list = mmcif_dict["_atom_site.pdbx_PDB_ins_code"]
        b_factor_list = mmcif_dict["_atom_site.B_iso_or_equiv"]
        occupancy_list = mmcif_dict["_atom_site.occupancy"]
        fieldname_list = mmcif_dict["_atom_site.group_PDB"]
        try:
            serial_list = [
                int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]
            ]
        except KeyError:
            # No model number column
            serial_list = None
        except ValueError:
            # Invalid model number (malformed file)
            raise PDBConstructionException("Invalid model number")
        try:
            aniso_u11 = mmcif_dict["_atom_site_anisotrop.U[1][1]"]
            aniso_u12 = mmcif_dict["_atom_site_anisotrop.U[1][2]"]
            aniso_u13 = mmcif_dict["_atom_site_anisotrop.U[1][3]"]
            aniso_u22 = mmcif_dict["_atom_site_anisotrop.U[2][2]"]
            aniso_u23 = mmcif_dict["_atom_site_anisotrop.U[2][3]"]
            aniso_u33 = mmcif_dict["_atom_site_anisotrop.U[3][3]"]
            aniso_flag = 1
        except KeyError:
            # no anisotropic B factors
            aniso_flag = 0
        # if auth_seq_id is present, we use this.
        # Otherwise label_seq_id is used.
        if "_atom_site.auth_seq_id" in mmcif_dict:
            seq_id_list = mmcif_dict["_atom_site.auth_seq_id"]
        else:
            seq_id_list = mmcif_dict["_atom_site.label_seq_id"]
        # Now loop over atoms and build the structure
        current_chain_id = None
        current_residue_id = None
        current_resname = None
        structure_builder = self._structure_builder
        structure_builder.init_structure(structure_id)
        structure_builder.init_seg(" ")
        # Historically, Biopython PDB parser uses model_id to mean array index
        # so serial_id means the Model ID specified in the file
        current_model_id = -1
        current_serial_id = -1
        for i in range(0, len(atom_id_list)):

            # set the line_counter for 'ATOM' lines only and not
            # as a global line counter found in the PDBParser()
            # this number should match the '_atom_site.id' index in the MMCIF
            structure_builder.set_line_counter(i)

            x = x_list[i]
            y = y_list[i]
            z = z_list[i]
            resname = residue_id_list[i]
            chainid = chain_id_list[i]
            altloc = alt_list[i]
            if altloc in _unassigned:
                altloc = " "
            int_resseq = int(seq_id_list[i])
            icode = icode_list[i]
            if icode in _unassigned:
                icode = " "
            name = atom_id_list[i]
            # occupancy & B factor
            try:
                tempfactor = float(b_factor_list[i])
            except ValueError:
                raise PDBConstructionException("Invalid or missing B factor")
            try:
                occupancy = float(occupancy_list[i])
            except ValueError:
                raise PDBConstructionException("Invalid or missing occupancy")
            fieldname = fieldname_list[i]
            if fieldname == "HETATM":
                if resname == "HOH" or resname == "WAT":
                    hetatm_flag = "W"
                else:
                    hetatm_flag = "H"
            else:
                hetatm_flag = " "

            resseq = (hetatm_flag, int_resseq, icode)

            if serial_list is not None:
                # model column exists; use it
                serial_id = serial_list[i]
                if current_serial_id != serial_id:
                    # if serial changes, update it and start new model
                    current_serial_id = serial_id
                    current_model_id += 1
                    structure_builder.init_model(current_model_id,
                                                 current_serial_id)
                    current_chain_id = None
                    current_residue_id = None
                    current_resname = None
            else:
                # no explicit model column; initialize single model
                structure_builder.init_model(current_model_id)

            if current_chain_id != chainid:
                current_chain_id = chainid
                structure_builder.init_chain(current_chain_id)
                current_residue_id = None
                current_resname = None

            if current_residue_id != resseq or current_resname != resname:
                current_residue_id = resseq
                current_resname = resname
                structure_builder.init_residue(resname, hetatm_flag,
                                               int_resseq, icode)

            coord = numpy.array((x, y, z), "f")
            element = element_list[i].upper() if element_list else None
            structure_builder.init_atom(name,
                                        coord,
                                        tempfactor,
                                        occupancy,
                                        altloc,
                                        name,
                                        element=element)
            if aniso_flag == 1 and i < len(aniso_u11):
                u = (
                    aniso_u11[i],
                    aniso_u12[i],
                    aniso_u13[i],
                    aniso_u22[i],
                    aniso_u23[i],
                    aniso_u33[i],
                )
                mapped_anisou = [float(_) for _ in u]
                anisou_array = numpy.array(mapped_anisou, "f")
                structure_builder.set_anisou(anisou_array)
        # Now try to set the cell
        try:
            a = float(mmcif_dict["_cell.length_a"][0])
            b = float(mmcif_dict["_cell.length_b"][0])
            c = float(mmcif_dict["_cell.length_c"][0])
            alpha = float(mmcif_dict["_cell.angle_alpha"][0])
            beta = float(mmcif_dict["_cell.angle_beta"][0])
            gamma = float(mmcif_dict["_cell.angle_gamma"][0])
            cell = numpy.array((a, b, c, alpha, beta, gamma), "f")
            spacegroup = mmcif_dict["_symmetry.space_group_name_H-M"][0]
            spacegroup = spacegroup[1:-1]  # get rid of quotes!!
            if spacegroup is None:
                raise Exception
            structure_builder.set_symmetry(spacegroup, cell)
        except Exception:
            pass  # no cell found, so just ignore
Ejemplo n.º 3
0
    def init_residue(self, resname, field, resseq, icode):
        """Create a new Residue object.

        Arguments:
         - resname - string, e.g. "ASN"
         - field - hetero flag, "W" for waters, "H" for
           hetero residues, otherwise blank.
         - resseq - int, sequence identifier
         - icode - string, insertion code

        """
        if field != " ":
            if field == "H":
                # The hetero field consists of H_ + the residue name (e.g. H_FUC)
                field = "H_" + resname
        res_id = (field, resseq, icode)
        if field == " ":
            if self.chain.has_id(res_id):
                # There already is a residue with the id (field, resseq, icode).
                # This only makes sense in the case of a point mutation.
                warnings.warn(
                    "WARNING: Residue ('%s', %i, '%s') "
                    "redefined at line %i." %
                    (field, resseq, icode, self.line_counter),
                    PDBConstructionWarning)
                duplicate_residue = self.chain[res_id]
                if duplicate_residue.is_disordered() == 2:
                    # The residue in the chain is a DisorderedResidue object.
                    # So just add the last Residue object.
                    if duplicate_residue.disordered_has_id(resname):
                        # The residue was already made
                        self.residue = duplicate_residue
                        duplicate_residue.disordered_select(resname)
                    else:
                        # Make a new residue and add it to the already
                        # present DisorderedResidue
                        new_residue = Residue(res_id, resname, self.segid)
                        duplicate_residue.disordered_add(new_residue)
                        self.residue = duplicate_residue
                        return
                else:
                    if resname == duplicate_residue.resname:
                        warnings.warn(
                            "WARNING: Residue ('%s', %i, '%s','%s')"
                            " already defined with the same name at line  %i."
                            %
                            (field, resseq, icode, resname, self.line_counter),
                            PDBConstructionWarning)
                        self.residue = duplicate_residue
                        return
                    # Make a new DisorderedResidue object and put all
                    # the Residue objects with the id (field, resseq, icode) in it.
                    # These residues each should have non-blank altlocs for all their atoms.
                    # If not, the PDB file probably contains an error.
                    if not self._is_completely_disordered(duplicate_residue):
                        # if this exception is ignored, a residue will be missing
                        self.residue = None
                        raise PDBConstructionException(
                            "Blank altlocs in duplicate residue %s ('%s', %i, '%s')"
                            % (resname, field, resseq, icode))
                    self.chain.detach_child(res_id)
                    new_residue = Residue(res_id, resname, self.segid)
                    disordered_residue = DisorderedResidue(res_id)
                    self.chain.add(disordered_residue)
                    disordered_residue.disordered_add(duplicate_residue)
                    disordered_residue.disordered_add(new_residue)
                    self.residue = disordered_residue
                    return
        self.residue = Residue(res_id, resname, self.segid)
        self.chain.add(self.residue)
Ejemplo n.º 4
0
    def _parse_coordinates(self, coords_trailer):
        """Parse the atomic data in the PDB file (PRIVATE)."""
        allowed_records = {
            "ATOM  ",
            "HETATM",
            "MODEL ",
            "ENDMDL",
            "TER   ",
            "ANISOU",
            # These are older 2.3 format specs:
            "SIGATM",
            "SIGUIJ",
            # bookkeeping records after coordinates:
            "MASTER",
        }

        local_line_counter = 0
        structure_builder = self.structure_builder
        current_model_id = 0
        # Flag we have an open model
        model_open = 0
        current_chain_id = None
        current_segid = None
        current_residue_id = None
        current_resname = None

        for i in range(0, len(coords_trailer)):
            line = coords_trailer[i].rstrip("\n")
            record_type = line[0:6]
            global_line_counter = self.line_counter + local_line_counter + 1
            structure_builder.set_line_counter(global_line_counter)
            if not line.strip():
                continue  # skip empty lines
            elif record_type == "ATOM  " or record_type == "HETATM":
                # Initialize the Model - there was no explicit MODEL record
                if not model_open:
                    structure_builder.init_model(current_model_id)
                    current_model_id += 1
                    model_open = 1
                fullname = line[12:16]
                # get rid of whitespace in atom names
                split_list = fullname.split()
                if len(split_list) != 1:
                    # atom name has internal spaces, e.g. " N B ", so
                    # we do not strip spaces
                    name = fullname
                else:
                    # atom name is like " CA ", so we can strip spaces
                    name = split_list[0]
                altloc = line[16]
                resname = line[17:20]
                chainid = line[21]
                try:
                    serial_number = int(line[6:11])
                except Exception:
                    serial_number = 0
                resseq = int(line[22:26].split()[0])  # sequence identifier
                icode = line[26]  # insertion code
                if record_type == "HETATM":  # hetero atom flag
                    if resname == "HOH" or resname == "WAT":
                        hetero_flag = "W"
                    else:
                        hetero_flag = "H"
                else:
                    hetero_flag = " "
                residue_id = (hetero_flag, resseq, icode)
                # atomic coordinates
                try:
                    x = float(line[30:38])
                    y = float(line[38:46])
                    z = float(line[46:54])
                except Exception:
                    # Should we allow parsing to continue in permissive mode?
                    # If so, what coordinates should we default to?  Easier to abort!
                    raise PDBConstructionException(
                        "Invalid or missing coordinate(s) at line %i." %
                        global_line_counter) from None
                coord = numpy.array((x, y, z), "f")

                # occupancy & B factor
                if not self.is_pqr:
                    try:
                        occupancy = float(line[54:60])
                    except Exception:
                        self._handle_PDB_exception(
                            "Invalid or missing occupancy",
                            global_line_counter)
                        occupancy = None  # Rather than arbitrary zero or one
                    if occupancy is not None and occupancy < 0:
                        # TODO - Should this be an error in strict mode?
                        # self._handle_PDB_exception("Negative occupancy",
                        #                            global_line_counter)
                        # This uses fixed text so the warning occurs once only:
                        warnings.warn(
                            "Negative occupancy in one or more atoms",
                            PDBConstructionWarning,
                        )
                    try:
                        bfactor = float(line[60:66])
                    except Exception:
                        self._handle_PDB_exception(
                            "Invalid or missing B factor", global_line_counter)
                        bfactor = 0.0  # PDB uses a default of zero if missing

                elif self.is_pqr:
                    # Attempt to parse charge and radius fields
                    try:
                        pqr_charge = float(line[54:62])
                    except Exception:
                        self._handle_PDB_exception("Invalid or missing charge",
                                                   global_line_counter)
                        pqr_charge = None  # Rather than arbitrary zero or one
                    try:
                        radius = float(line[62:70])
                    except Exception:
                        self._handle_PDB_exception("Invalid or missing radius",
                                                   global_line_counter)
                        radius = None
                    if radius is not None and radius < 0:
                        # In permissive mode raise fatal exception.
                        message = "Negative atom radius"
                        self._handle_PDB_exception(message,
                                                   global_line_counter)
                        radius = None

                segid = line[72:76]
                element = line[76:78].strip().upper()
                if current_segid != segid:
                    current_segid = segid
                    structure_builder.init_seg(current_segid)
                if current_chain_id != chainid:
                    current_chain_id = chainid
                    structure_builder.init_chain(current_chain_id)
                    current_residue_id = residue_id
                    current_resname = resname
                    try:
                        structure_builder.init_residue(resname, hetero_flag,
                                                       resseq, icode)
                    except PDBConstructionException as message:
                        self._handle_PDB_exception(message,
                                                   global_line_counter)
                elif current_residue_id != residue_id or current_resname != resname:
                    current_residue_id = residue_id
                    current_resname = resname
                    try:
                        structure_builder.init_residue(resname, hetero_flag,
                                                       resseq, icode)
                    except PDBConstructionException as message:
                        self._handle_PDB_exception(message,
                                                   global_line_counter)

                if not self.is_pqr:
                    # init atom with pdb fields
                    try:
                        structure_builder.init_atom(
                            name,
                            coord,
                            bfactor,
                            occupancy,
                            altloc,
                            fullname,
                            serial_number,
                            element,
                        )
                    except PDBConstructionException as message:
                        self._handle_PDB_exception(message,
                                                   global_line_counter)
                elif self.is_pqr:
                    try:
                        structure_builder.init_atom(
                            name,
                            coord,
                            pqr_charge,
                            radius,
                            altloc,
                            fullname,
                            serial_number,
                            element,
                            pqr_charge,
                            radius,
                            self.is_pqr,
                        )
                    except PDBConstructionException as message:
                        self._handle_PDB_exception(message,
                                                   global_line_counter)
            elif record_type == "ANISOU":
                anisou = [
                    float(x) for x in (
                        line[28:35],
                        line[35:42],
                        line[43:49],
                        line[49:56],
                        line[56:63],
                        line[63:70],
                    )
                ]
                # U's are scaled by 10^4
                anisou_array = (numpy.array(anisou, "f") / 10000.0).astype("f")
                structure_builder.set_anisou(anisou_array)
            elif record_type == "MODEL ":
                try:
                    serial_num = int(line[10:14])
                except Exception:
                    self._handle_PDB_exception(
                        "Invalid or missing model serial number",
                        global_line_counter)
                    serial_num = 0
                structure_builder.init_model(current_model_id, serial_num)
                current_model_id += 1
                model_open = 1
                current_chain_id = None
                current_residue_id = None
            elif record_type == "END   " or record_type == "CONECT":
                # End of atomic data, return the trailer
                self.line_counter += local_line_counter
                return coords_trailer[local_line_counter:]
            elif record_type == "ENDMDL":
                model_open = 0
                current_chain_id = None
                current_residue_id = None
            elif record_type == "SIGUIJ":
                # standard deviation of anisotropic B factor
                siguij = [
                    float(x) for x in (
                        line[28:35],
                        line[35:42],
                        line[42:49],
                        line[49:56],
                        line[56:63],
                        line[63:70],
                    )
                ]
                # U sigma's are scaled by 10^4
                siguij_array = (numpy.array(siguij, "f") / 10000.0).astype("f")
                structure_builder.set_siguij(siguij_array)
            elif record_type == "SIGATM":
                # standard deviation of atomic positions
                sigatm = [
                    float(x) for x in (
                        line[30:38],
                        line[38:45],
                        line[46:54],
                        line[54:60],
                        line[60:66],
                    )
                ]
                sigatm_array = numpy.array(sigatm, "f")
                structure_builder.set_sigatm(sigatm_array)
            elif record_type not in allowed_records:
                warnings.warn(
                    "Ignoring unrecognized record '{}' at line {}".format(
                        record_type, global_line_counter),
                    PDBConstructionWarning,
                )
            local_line_counter += 1
        # EOF (does not end in END or CONECT)
        self.line_counter = self.line_counter + local_line_counter
        return []
Ejemplo n.º 5
0
 def _build_structure(self, structure_id):
     mmcif_dict = self._mmcif_dict
     atom_id_list = mmcif_dict["_atom_site.label_atom_id"]
     residue_id_list = mmcif_dict["_atom_site.label_comp_id"]
     try:
         element_list = mmcif_dict["_atom_site.type_symbol"]
     except KeyError:
         element_list = None
     seq_id_list = mmcif_dict["_atom_site.label_seq_id"]
     chain_id_list = mmcif_dict["_atom_site.label_asym_id"]
     x_list = map(float, mmcif_dict["_atom_site.Cartn_x"])
     y_list = map(float, mmcif_dict["_atom_site.Cartn_y"])
     z_list = map(float, mmcif_dict["_atom_site.Cartn_z"])
     alt_list = mmcif_dict["_atom_site.label_alt_id"]
     b_factor_list = mmcif_dict["_atom_site.B_iso_or_equiv"]
     occupancy_list = mmcif_dict["_atom_site.occupancy"]
     fieldname_list = mmcif_dict["_atom_site.group_PDB"]
     try:
         serial_list = [
             int(n) for n in mmcif_dict["_atom_site.pdbx_PDB_model_num"]
         ]
     except KeyError:
         # No model number column
         serial_list = None
     except ValueError:
         # Invalid model number (malformed file)
         raise PDBConstructionException("Invalid model number")
     try:
         aniso_u11 = mmcif_dict["_atom_site.aniso_U[1][1]"]
         aniso_u12 = mmcif_dict["_atom_site.aniso_U[1][2]"]
         aniso_u13 = mmcif_dict["_atom_site.aniso_U[1][3]"]
         aniso_u22 = mmcif_dict["_atom_site.aniso_U[2][2]"]
         aniso_u23 = mmcif_dict["_atom_site.aniso_U[2][3]"]
         aniso_u33 = mmcif_dict["_atom_site.aniso_U[3][3]"]
         aniso_flag = 1
     except KeyError:
         # no anisotropic B factors
         aniso_flag = 0
     # if auth_seq_id is present, we use this.
     # Otherwise label_seq_id is used.
     if "_atom_site.auth_seq_id" in mmcif_dict:
         seq_id_list = mmcif_dict["_atom_site.auth_seq_id"]
     else:
         seq_id_list = mmcif_dict["_atom_site.label_seq_id"]
     # Now loop over atoms and build the structure
     current_chain_id = None
     current_residue_id = None
     structure_builder = self._structure_builder
     structure_builder.init_structure(structure_id)
     structure_builder.init_seg(" ")
     # Historically, Biopython PDB parser uses model_id to mean array index
     # so serial_id means the Model ID specified in the file
     current_model_id = 0
     current_serial_id = 0
     for i in xrange(0, len(atom_id_list)):
         x = x_list[i]
         y = y_list[i]
         z = z_list[i]
         resname = residue_id_list[i]
         chainid = chain_id_list[i]
         altloc = alt_list[i]
         if altloc == ".":
             altloc = " "
         resseq = seq_id_list[i]
         name = atom_id_list[i]
         # occupancy & B factor
         try:
             tempfactor = float(b_factor_list[i])
         except ValueError:
             raise PDBConstructionException("Invalid or missing B factor")
         try:
             occupancy = float(occupancy_list[i])
         except ValueError:
             raise PDBConstructionException("Invalid or missing occupancy")
         fieldname = fieldname_list[i]
         if fieldname == "HETATM":
             hetatm_flag = "H"
         else:
             hetatm_flag = " "
         if serial_list is not None:
             # model column exists; use it
             serial_id = serial_list[i]
             if current_serial_id != serial_id:
                 # if serial changes, update it and start new model
                 current_serial_id = serial_id
                 structure_builder.init_model(current_model_id,
                                              current_serial_id)
                 current_model_id += 1
         else:
             # no explicit model column; initialize single model
             structure_builder.init_model(current_model_id)
         if current_chain_id != chainid:
             current_chain_id = chainid
             structure_builder.init_chain(current_chain_id)
             current_residue_id = resseq
             icode, int_resseq = self._get_icode(resseq)
             structure_builder.init_residue(resname, hetatm_flag,
                                            int_resseq, icode)
         elif current_residue_id != resseq:
             current_residue_id = resseq
             icode, int_resseq = self._get_icode(resseq)
             structure_builder.init_residue(resname, hetatm_flag,
                                            int_resseq, icode)
         coord = numpy.array((x, y, z), 'f')
         element = element_list[i] if element_list else None
         structure_builder.init_atom(name,
                                     coord,
                                     tempfactor,
                                     occupancy,
                                     altloc,
                                     name,
                                     element=element)
         if aniso_flag == 1:
             u = (aniso_u11[i], aniso_u12[i], aniso_u13[i], aniso_u22[i],
                  aniso_u23[i], aniso_u33[i])
             mapped_anisou = map(float, u)
             anisou_array = numpy.array(mapped_anisou, 'f')
             structure_builder.set_anisou(anisou_array)
     # Now try to set the cell
     try:
         a = float(mmcif_dict["_cell.length_a"])
         b = float(mmcif_dict["_cell.length_b"])
         c = float(mmcif_dict["_cell.length_c"])
         alpha = float(mmcif_dict["_cell.angle_alpha"])
         beta = float(mmcif_dict["_cell.angle_beta"])
         gamma = float(mmcif_dict["_cell.angle_gamma"])
         cell = numpy.array((a, b, c, alpha, beta, gamma), 'f')
         spacegroup = mmcif_dict["_symmetry.space_group_name_H-M"]
         spacegroup = spacegroup[1:-1]  # get rid of quotes!!
         if spacegroup is None:
             raise Exception
         structure_builder.set_symmetry(spacegroup, cell)
     except:
         pass  # no cell found, so just ignore
Ejemplo n.º 6
0
 def _parse_coordinates(self, coords_trailer):
     "Parse the atomic data in the PDB file."
     local_line_counter=0
     structure_builder=self.structure_builder
     current_model_id=0
     # Flag we have an open model
     model_open=0
     current_chain_id=None
     current_segid=None
     current_residue_id=None
     current_resname=None
     for i in range(0, len(coords_trailer)):
         line=coords_trailer[i]
         record_type=line[0:6]
         global_line_counter=self.line_counter+local_line_counter+1
         structure_builder.set_line_counter(global_line_counter)
         if(record_type=='ATOM  ' or record_type=='HETATM'):
             # Initialize the Model - there was no explicit MODEL record
             if not model_open:
                 structure_builder.init_model(current_model_id)
                 current_model_id+=1
                 model_open=1
             fullname=line[12:16]
             # get rid of whitespace in atom names
             split_list=fullname.split()
             if len(split_list)!=1:
                 # atom name has internal spaces, e.g. " N B ", so
                 # we do not strip spaces
                 name=fullname
             else:
                 # atom name is like " CA ", so we can strip spaces
                 name=split_list[0]
             altloc=line[16:17]
             resname=line[17:20]
             chainid=line[21:22]
             try:
                 serial_number=int(line[6:11])
             except:
                 serial_number=0
             resseq=int(line[22:26].split()[0])   # sequence identifier   
             icode=line[26:27]           # insertion code
             if record_type=='HETATM':       # hetero atom flag
                 if resname=="HOH" or resname=="WAT":
                     hetero_flag="W"
                 else:
                     hetero_flag="H"
             else:
                 hetero_flag=" "
             residue_id=(hetero_flag, resseq, icode)
             # atomic coordinates
             try:
                 x=float(line[30:38]) 
                 y=float(line[38:46]) 
                 z=float(line[46:54])
             except:
                 #Should we allow parsing to continue in permissive mode?
                 #If so what coordindates should we default to?  Easier to abort!
                 raise PDBConstructionException(\
                     "Invalid or missing coordinate(s) at line %i." \
                     % global_line_counter)
             coord=numpy.array((x, y, z), 'f')
             # occupancy & B factor
             try:
                 occupancy=float(line[54:60])
             except:
                 self._handle_PDB_exception("Invalid or missing occupancy",
                                            global_line_counter)
                 occupancy = 0.0 #Is one or zero a good default?
             try:
                 bfactor=float(line[60:66])
             except:
                 self._handle_PDB_exception("Invalid or missing B factor",
                                            global_line_counter)
                 bfactor = 0.0 #The PDB use a default of zero if the data is missing
             segid=line[72:76]
             element=line[76:78].strip()
             if current_segid!=segid:
                 current_segid=segid
                 structure_builder.init_seg(current_segid)
             if current_chain_id!=chainid:
                 current_chain_id=chainid
                 structure_builder.init_chain(current_chain_id)
                 current_residue_id=residue_id
                 current_resname=resname
                 try:
                     structure_builder.init_residue(resname, hetero_flag, resseq, icode)
                 except PDBConstructionException, message:
                     self._handle_PDB_exception(message, global_line_counter)
             elif current_residue_id!=residue_id or current_resname!=resname:
                 current_residue_id=residue_id
                 current_resname=resname
                 try:
                     structure_builder.init_residue(resname, hetero_flag, resseq, icode)
                 except PDBConstructionException, message:
                     self._handle_PDB_exception(message, global_line_counter) 
             # init atom
             try:
                 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc,
                                             fullname, serial_number, element)
             except PDBConstructionException, message:
                 self._handle_PDB_exception(message, global_line_counter)
Ejemplo n.º 7
0
 def _parse_coordinates(self, coords_trailer):
     "Parse the atomic data in the PDB file."
     local_line_counter = 0
     structure_builder = self.structure_builder
     current_model_id = 0
     # Flag we have an open model
     model_open = 0
     current_chain_id = None
     current_segid = None
     current_residue_id = None
     current_resname = None
     for i in range(0, len(coords_trailer)):
         line = coords_trailer[i]
         record_type = line[0:6]
         global_line_counter = self.line_counter + local_line_counter + 1
         structure_builder.set_line_counter(global_line_counter)
         if record_type == "ATOM  " or record_type == "HETATM":
             # Initialize the Model - there was no explicit MODEL record
             if not model_open:
                 structure_builder.init_model(current_model_id)
                 current_model_id += 1
                 model_open = 1
             fullname = line[12:16]
             # get rid of whitespace in atom names
             split_list = fullname.split()
             if len(split_list) != 1:
                 # atom name has internal spaces, e.g. " N B ", so
                 # we do not strip spaces
                 name = fullname
             else:
                 # atom name is like " CA ", so we can strip spaces
                 name = split_list[0]
             altloc = line[16]
             resname = line[17:20]
             chainid = line[21]
             try:
                 serial_number = int(line[6:11])
             except:
                 serial_number = 0
             resseq = int(line[22:26].split()[0])  # sequence identifier
             icode = line[26]  # insertion code
             if record_type == "HETATM":  # hetero atom flag
                 if resname == "HOH" or resname == "WAT":
                     hetero_flag = "W"
                 else:
                     hetero_flag = "H"
             else:
                 hetero_flag = " "
             residue_id = (hetero_flag, resseq, icode)
             # atomic coordinates
             try:
                 x = float(line[30:38])
                 y = float(line[38:46])
                 z = float(line[46:54])
             except:
                 # Should we allow parsing to continue in permissive mode?
                 # If so, what coordinates should we default to?  Easier to abort!
                 raise PDBConstructionException(
                     "Invalid or missing coordinate(s) at line %i." %
                     global_line_counter)
             coord = numpy.array((x, y, z), "f")
             # occupancy & B factor
             try:
                 occupancy = float(line[54:60])
             except:
                 self._handle_PDB_exception("Invalid or missing occupancy",
                                            global_line_counter)
                 occupancy = None  # Rather than arbitrary zero or one
             try:
                 bfactor = float(line[60:66])
             except:
                 self._handle_PDB_exception("Invalid or missing B factor",
                                            global_line_counter)
                 bfactor = 0.0  # The PDB use a default of zero if the data is missing
             segid = line[72:76]
             element = line[76:78].strip()
             if current_segid != segid:
                 current_segid = segid
                 structure_builder.init_seg(current_segid)
             if current_chain_id != chainid:
                 current_chain_id = chainid
                 structure_builder.init_chain(current_chain_id)
                 current_residue_id = residue_id
                 current_resname = resname
                 try:
                     structure_builder.init_residue(resname, hetero_flag,
                                                    resseq, icode)
                 except PDBConstructionException as message:
                     self._handle_PDB_exception(message,
                                                global_line_counter)
             elif current_residue_id != residue_id or current_resname != resname:
                 current_residue_id = residue_id
                 current_resname = resname
                 try:
                     structure_builder.init_residue(resname, hetero_flag,
                                                    resseq, icode)
                 except PDBConstructionException as message:
                     self._handle_PDB_exception(message,
                                                global_line_counter)
             # init atom
             try:
                 structure_builder.init_atom(name, coord, bfactor,
                                             occupancy, altloc, fullname,
                                             serial_number, element)
             except PDBConstructionException as message:
                 self._handle_PDB_exception(message, global_line_counter)
         elif record_type == "ANISOU":
             anisou = map(float, (line[28:35], line[35:42], line[43:49],
                                  line[49:56], line[56:63], line[63:70]))
             # U's are scaled by 10^4
             anisou_array = (numpy.array(anisou, "f") / 10000.0).astype("f")
             structure_builder.set_anisou(anisou_array)
         elif record_type == "MODEL ":
             try:
                 serial_num = int(line[10:14])
             except:
                 self._handle_PDB_exception(
                     "Invalid or missing model serial number",
                     global_line_counter)
                 serial_num = 0
             structure_builder.init_model(current_model_id, serial_num)
             current_model_id += 1
             model_open = 1
             current_chain_id = None
             current_residue_id = None
         elif record_type == "END   " or record_type == "CONECT":
             # End of atomic data, return the trailer
             self.line_counter += local_line_counter
             return coords_trailer[local_line_counter:]
         elif record_type == "ENDMDL":
             model_open = 0
             current_chain_id = None
             current_residue_id = None
         elif record_type == "SIGUIJ":
             # standard deviation of anisotropic B factor
             siguij = map(float, (line[28:35], line[35:42], line[42:49],
                                  line[49:56], line[56:63], line[63:70]))
             # U sigma's are scaled by 10^4
             siguij_array = (numpy.array(siguij, "f") / 10000.0).astype("f")
             structure_builder.set_siguij(siguij_array)
         elif record_type == "SIGATM":
             # standard deviation of atomic positions
             sigatm = map(float, (line[30:38], line[38:45], line[46:54],
                                  line[54:60], line[60:66]))
             sigatm_array = numpy.array(sigatm, "f")
             structure_builder.set_sigatm(sigatm_array)
         local_line_counter += 1
     # EOF (does not end in END or CONECT)
     self.line_counter = self.line_counter + local_line_counter
     return []