Example #1
0
    def __init__(self, PERMISSIVE=1, get_header=0, structure_builder=None):
        if structure_builder != None:
            self.structure_builder = structure_builder
        else:
            self.structure_builder = StructureBuilder()
        self.header = None
        self.trailer = None
        self.line_counter = 0
        self.PERMISSIVE = PERMISSIVE

        self.structure_reference = {}
        self.authors = {}
        self.current_model_id = 0
        self.current_chain_id = 0
        self.current_residue_id = 0
        self.current_atom_id = 0

        self.header_dict = {
            "name": "",
            "head": "",
            "deposition_date": "1900-01-01",
            "release_date": "1909-01-08",
            "structure_method": "unknown",
            "resolution": None,
            "structure_reference": "unknown",
            "journal_reference": "unknown",
            "journal": "unknown",
            "keywords": "",
            "author": "",  # andere Anordnung im als im normalen PDB-File, gruening, b.a. <-> b.a.gruening
            "compound": {"1": {"misc": ""}},
            "source": {"1": {"misc": ""}},
        }
Example #2
0
    def __init__(self, PERMISSIVE=1, get_header=0, structure_builder=None):
        if structure_builder != None:
            self.structure_builder = structure_builder
        else:
            self.structure_builder = StructureBuilder()
        self.header = None
        self.trailer = None
        self.line_counter = 0
        self.PERMISSIVE = PERMISSIVE

        self.structure_reference = {}
        self.authors = {}
        self.current_model_id = 0
        self.current_chain_id = 0
        self.current_residue_id = 0
        self.current_atom_id = 0

        self.header_dict = {
            'name': "",
            'head': '',
            'deposition_date': "1900-01-01",
            'release_date': "1909-01-08",
            'structure_method': "unknown",
            'resolution': None,
            'structure_reference': "unknown",
            'journal_reference': "unknown",
            'journal': "unknown",
            'keywords': "",
            'author':
            "",  #andere Anordnung im als im normalen PDB-File, gruening, b.a. <-> b.a.gruening
            'compound': {
                '1': {
                    'misc': ''
                }
            },
            'source': {
                '1': {
                    'misc': ''
                }
            },
        }
Example #3
0
class PDBMLParser:
    def __init__(self, PERMISSIVE=1, get_header=0, structure_builder=None):
        if structure_builder != None:
            self.structure_builder = structure_builder
        else:
            self.structure_builder = StructureBuilder()
        self.header = None
        self.trailer = None
        self.line_counter = 0
        self.PERMISSIVE = PERMISSIVE

        self.structure_reference = {}
        self.authors = {}
        self.current_model_id = 0
        self.current_chain_id = 0
        self.current_residue_id = 0
        self.current_atom_id = 0

        self.header_dict = {
            "name": "",
            "head": "",
            "deposition_date": "1900-01-01",
            "release_date": "1909-01-08",
            "structure_method": "unknown",
            "resolution": None,
            "structure_reference": "unknown",
            "journal_reference": "unknown",
            "journal": "unknown",
            "keywords": "",
            "author": "",  # andere Anordnung im als im normalen PDB-File, gruening, b.a. <-> b.a.gruening
            "compound": {"1": {"misc": ""}},
            "source": {"1": {"misc": ""}},
        }

    def get_structure(self, id, file):
        """Return the structure.

        Arguments:
        o id - string, the id that will be used for the structure
        o file - name of the PDB file OR an open filehandle
        """
        self.header = None
        self.trailer = None
        # Make a StructureBuilder instance (pass id of structure as parameter)
        self.structure_builder.init_structure(id)

        self._parse(file)
        self.structure_builder.set_header(self.header)
        # Return the Structure instance
        return self.structure_builder.get_structure()

    def get_header(self):
        """Return the header."""
        return self.header

    def _parse(self, filepath):

        # get an iterable
        context = etree.iterparse(filepath, events=("start", "end"))
        structure_builder = self.structure_builder
        # turn it into an iterator
        context = iter(context)

        # get the root element
        event, root = context.next()

        header_only = False
        # TODO: its not a good solution :(
        # Save the structure state for each atom, its used to add anisotrop properties afterwards
        atom_id_structure_mapping = {}

        for event, elem in context:
            if event == "end":  # and elem.tag == "record":
                if elem.tag == "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}atom_siteCategory":
                    elem.tail = None
                    elem.clear()

                if elem.tag == "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}atom_site_anisotrop":
                    # res = chain.__getitem__(("", id, ""))
                    # atom = res.__getitem__()
                    pass
                if elem.tag == "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}atom_site":
                    if header_only:
                        elem.clear()
                    else:
                        atom_id = int(elem.get("id"))
                        # SET the AtomID as line_counter
                        structure_builder.set_line_counter(atom_id)
                        resname = elem.find("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}auth_comp_id").text

                        if elem.find("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}group_PDB").text == "ATOM":
                            if resname == "HOH" or resname == "WAT":
                                hetero_flag = "W"
                            else:
                                hetero_flag = "H"
                        else:
                            hetero_flag = " "

                        sequence_identifier = elem.findtext("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}auth_seq_id")
                        model = int(elem.findtext("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}pdbx_PDB_model_num"))
                        if self.current_model_id != model:
                            self.current_model_id = model
                            structure_builder.init_model(self.current_model_id)
                            self.current_chain_id = 0
                        seg_id = elem.findtext("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}label_seq_id")
                        structure_builder.init_seg(seg_id)

                        chain = elem.find("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}auth_asym_id").text
                        if self.current_chain_id != chain:
                            self.current_chain_id = chain
                            structure_builder.init_chain(self.current_chain_id)
                            self.current_residue_id = 0
                        """ 
                            Initiate a new Residue object. 

                            Arguments: 
                            o resname - string, e.g. "ASN" 
                            o field - hetero flag, "W" for waters, "H" for  
                                hetero residues, otherwise blank. 
                            o resseq - int, sequence identifier
                            o icode - string, insertion code 
                        """
                        if self.current_residue_id != sequence_identifier:
                            self.current_residue_id = sequence_identifier
                            try:
                                structure_builder.init_residue(resname, hetero_flag, sequence_identifier, " ")
                            except PDBConstructionException, message:
                                self._handle_PDB_exception(message, atom_id)
                            residue_container = (resname, hetero_flag, sequence_identifier, " ")

                        """ 
                            Initiate a new Atom object. 

                            Arguments: 
                            o name - string, atom name, e.g. CA, spaces should be stripped 
                            o coord - Numeric array (Float0, size 3), atomic coordinates 
                            o b_factor - float, B factor 
                            o occupancy - float 
                            o altloc - string, alternative location specifier 
                            o fullname - string, atom name including spaces, e.g. " CA " 
                        """
                        name = elem.findtext("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}label_atom_id")
                        x, y, z = (
                            float(elem.findtext("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}Cartn_x")),
                            float(elem.findtext("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}Cartn_y")),
                            float(elem.findtext("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}Cartn_z")),
                        )
                        coord = numpy.array((x, y, z), "f")
                        b_factor = float(
                            elem.findtext("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}B_iso_or_equiv", "0.0")
                        )
                        occupancy = float(elem.findtext("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}occupancy", "0.0"))
                        altloc = elem.findtext("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}label_alt_id")
                        if not altloc:
                            altloc = ""
                        fullname = name
                        # TODO: current_atom_id wird nicht gespeichert
                        if not name:
                            print "Error: no name"
                            sys.exit()
                        try:
                            structure_builder.init_atom(
                                name, coord, b_factor, occupancy, altloc, fullname, serial_number=atom_id
                            )
                        except PDBConstructionException, message:
                            self._handle_PDB_exception(message, atom_id)

                        atom_container = (name, coord, b_factor, occupancy, altloc, fullname, atom_id)
                        elem.clear()
                        elem.tail = None

                        atom_id_structure_mapping[atom_id] = {
                            "model": self.current_model_id,
                            "seg_id": seg_id,
                            "chain_id": self.current_chain_id,
                            "residue": residue_container,
                            "atom": atom_container,
                        }

                        """

                        # TODO:
                            recordtype ANISOU
                                anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70])) 
                                # U's are scaled by 10^4 
                                anisou_array=(numpy.array(anisou, 'f')/10000.0).astype('f') 
                                structure_builder.set_anisou(anisou_array) 
                            recordtype SIGUIJ
                                # standard deviation of anisotropic B factor 
                                siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70])) 
                                # U sigma's are scaled by 10^4 
                                siguij_array=(numpy.array(siguij, 'f')/10000.0).astype('f')    
                                structure_builder.set_siguij(siguij_array)
                            recordtype SIGATM
                                # standard deviation of atomic positions 
                                sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66]))
                                sigatm_array=numpy.array(sigatm, 'f')
                                structure_builder.set_sigatm(sigatm_array)
                        """

                elif elem.tag == "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}entity_polyCategory":
                    for sub in elem.findall("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}entity_poly"):
                        self.header_dict["compound"][sub.get("entity_id", "primary")].update(
                            {"chain": sub.findtext("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}pdbx_strand_id").lower()}
                        )
                    elem.clear()
                elif elem.tag == "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}entityCategory":
                    for subelem in elem.findall("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}entity"):
                        cid = subelem.get("id")
                        if self.header_dict["compound"].has_key(cid):
                            self.header_dict["compound"][cid].update(
                                {
                                    "molecule": subelem.findtext(
                                        "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}pdbx_description"
                                    ).lower()
                                }
                            )
                        else:
                            self.header_dict["compound"][cid] = {"misc": ""}
                            self.header_dict["compound"][cid].update(
                                {
                                    "molecule": subelem.findtext(
                                        "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}pdbx_description"
                                    ).lower()
                                }
                            )
                        temp = subelem.findtext("{http://pdbml.pdb.org/schema/pdbx-v32.xsd}pdbx_mutation")
                        if temp:
                            self.header_dict["compound"][cid].update({"mutation": temp.lower()})
                    elem.clear()
Example #4
0
class PDBMLParser:
    def __init__(self, PERMISSIVE=1, get_header=0, structure_builder=None):
        if structure_builder != None:
            self.structure_builder = structure_builder
        else:
            self.structure_builder = StructureBuilder()
        self.header = None
        self.trailer = None
        self.line_counter = 0
        self.PERMISSIVE = PERMISSIVE

        self.structure_reference = {}
        self.authors = {}
        self.current_model_id = 0
        self.current_chain_id = 0
        self.current_residue_id = 0
        self.current_atom_id = 0

        self.header_dict = {
            'name': "",
            'head': '',
            'deposition_date': "1900-01-01",
            'release_date': "1909-01-08",
            'structure_method': "unknown",
            'resolution': None,
            'structure_reference': "unknown",
            'journal_reference': "unknown",
            'journal': "unknown",
            'keywords': "",
            'author':
            "",  #andere Anordnung im als im normalen PDB-File, gruening, b.a. <-> b.a.gruening
            'compound': {
                '1': {
                    'misc': ''
                }
            },
            'source': {
                '1': {
                    'misc': ''
                }
            },
        }

    def get_structure(self, id, file):
        """Return the structure.

        Arguments:
        o id - string, the id that will be used for the structure
        o file - name of the PDB file OR an open filehandle
        """
        self.header = None
        self.trailer = None
        # Make a StructureBuilder instance (pass id of structure as parameter)
        self.structure_builder.init_structure(id)

        self._parse(file)
        self.structure_builder.set_header(self.header)
        # Return the Structure instance
        return self.structure_builder.get_structure()

    def get_header(self):
        """Return the header."""
        return self.header

    def _parse(self, filepath):

        # get an iterable
        context = etree.iterparse(filepath, events=("start", "end"))
        structure_builder = self.structure_builder
        # turn it into an iterator
        context = iter(context)

        # get the root element
        event, root = context.next()

        header_only = False
        # TODO: its not a good solution :(
        # Save the structure state for each atom, its used to add anisotrop properties afterwards
        atom_id_structure_mapping = {}

        for event, elem in context:
            if event == "end":  # and elem.tag == "record":
                if elem.tag == "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}atom_siteCategory":
                    elem.tail = None
                    elem.clear()

                if elem.tag == "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}atom_site_anisotrop":
                    #res = chain.__getitem__(("", id, ""))
                    #atom = res.__getitem__()
                    pass
                if elem.tag == "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}atom_site":
                    if header_only:
                        elem.clear()
                    else:
                        atom_id = int(elem.get("id"))
                        # SET the AtomID as line_counter
                        structure_builder.set_line_counter(atom_id)
                        resname = elem.find(
                            "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}auth_comp_id"
                        ).text

                        if elem.find(
                                "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}group_PDB"
                        ).text == "ATOM":
                            if resname == "HOH" or resname == "WAT":
                                hetero_flag = "W"
                            else:
                                hetero_flag = "H"
                        else:
                            hetero_flag = " "

                        sequence_identifier = elem.findtext(
                            "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}auth_seq_id"
                        )
                        model = int(
                            elem.findtext(
                                "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}pdbx_PDB_model_num"
                            ))
                        if self.current_model_id != model:
                            self.current_model_id = model
                            structure_builder.init_model(self.current_model_id)
                            self.current_chain_id = 0
                        seg_id = elem.findtext(
                            "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}label_seq_id"
                        )
                        structure_builder.init_seg(seg_id)

                        chain = elem.find(
                            "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}auth_asym_id"
                        ).text
                        if self.current_chain_id != chain:
                            self.current_chain_id = chain
                            structure_builder.init_chain(self.current_chain_id)
                            self.current_residue_id = 0
                        """ 
                            Initiate a new Residue object. 

                            Arguments: 
                            o resname - string, e.g. "ASN" 
                            o field - hetero flag, "W" for waters, "H" for  
                                hetero residues, otherwise blank. 
                            o resseq - int, sequence identifier
                            o icode - string, insertion code 
                        """
                        if self.current_residue_id != sequence_identifier:
                            self.current_residue_id = sequence_identifier
                            try:
                                structure_builder.init_residue(
                                    resname, hetero_flag, sequence_identifier,
                                    " ")
                            except PDBConstructionException, message:
                                self._handle_PDB_exception(message, atom_id)
                            residue_container = (resname, hetero_flag,
                                                 sequence_identifier, " ")
                        """ 
                            Initiate a new Atom object. 

                            Arguments: 
                            o name - string, atom name, e.g. CA, spaces should be stripped 
                            o coord - Numeric array (Float0, size 3), atomic coordinates 
                            o b_factor - float, B factor 
                            o occupancy - float 
                            o altloc - string, alternative location specifier 
                            o fullname - string, atom name including spaces, e.g. " CA " 
                        """
                        name = elem.findtext(
                            "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}label_atom_id"
                        )
                        x, y, z = float(
                            elem.findtext(
                                "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}Cartn_x"
                            )
                        ), float(
                            elem.findtext(
                                "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}Cartn_y"
                            )
                        ), float(
                            elem.findtext(
                                "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}Cartn_z"
                            ))
                        coord = numpy.array((x, y, z), 'f')
                        b_factor = float(
                            elem.findtext(
                                "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}B_iso_or_equiv",
                                "0.0"))
                        occupancy = float(
                            elem.findtext(
                                "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}occupancy",
                                "0.0"))
                        altloc = elem.findtext(
                            "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}label_alt_id"
                        )
                        if not altloc:
                            altloc = ""
                        fullname = name
                        #TODO: current_atom_id wird nicht gespeichert
                        if not name:
                            print "Error: no name"
                            sys.exit()
                        try:
                            structure_builder.init_atom(name,
                                                        coord,
                                                        b_factor,
                                                        occupancy,
                                                        altloc,
                                                        fullname,
                                                        serial_number=atom_id)
                        except PDBConstructionException, message:
                            self._handle_PDB_exception(message, atom_id)

                        atom_container = (name, coord, b_factor, occupancy,
                                          altloc, fullname, atom_id)
                        elem.clear()
                        elem.tail = None

                        atom_id_structure_mapping[atom_id] = {
                            "model": self.current_model_id,
                            "seg_id": seg_id,
                            "chain_id": self.current_chain_id,
                            "residue": residue_container,
                            "atom": atom_container
                        }
                        """

                        # TODO:
                            recordtype ANISOU
                                anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70])) 
                                # U's are scaled by 10^4 
                                anisou_array=(numpy.array(anisou, 'f')/10000.0).astype('f') 
                                structure_builder.set_anisou(anisou_array) 
                            recordtype SIGUIJ
                                # standard deviation of anisotropic B factor 
                                siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70])) 
                                # U sigma's are scaled by 10^4 
                                siguij_array=(numpy.array(siguij, 'f')/10000.0).astype('f')    
                                structure_builder.set_siguij(siguij_array)
                            recordtype SIGATM
                                # standard deviation of atomic positions 
                                sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66]))
                                sigatm_array=numpy.array(sigatm, 'f')
                                structure_builder.set_sigatm(sigatm_array)
                        """

                elif elem.tag == "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}entity_polyCategory":
                    for sub in elem.findall(
                            "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}entity_poly"
                    ):
                        self.header_dict["compound"][sub.get(
                            "entity_id", "primary"
                        )].update({
                            "chain":
                            sub.findtext(
                                "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}pdbx_strand_id"
                            ).lower()
                        })
                    elem.clear()
                elif elem.tag == "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}entityCategory":
                    for subelem in elem.findall(
                            "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}entity"
                    ):
                        cid = subelem.get("id")
                        if self.header_dict["compound"].has_key(cid):
                            self.header_dict["compound"][cid].update({
                                "molecule":
                                subelem.findtext(
                                    "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}pdbx_description"
                                ).lower()
                            })
                        else:
                            self.header_dict["compound"][cid] = {"misc": ''}
                            self.header_dict["compound"][cid].update({
                                "molecule":
                                subelem.findtext(
                                    "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}pdbx_description"
                                ).lower()
                            })
                        temp = subelem.findtext(
                            "{http://pdbml.pdb.org/schema/pdbx-v32.xsd}pdbx_mutation"
                        )
                        if temp:
                            self.header_dict["compound"][cid].update(
                                {"mutation": temp.lower()})
                    elem.clear()