Example #1
0
    def load_unit_cell(self, ucell_map):
        """Called by the implementation of load_metadata to load the
        unit cell pararameters for the structure.
        """
        for key in ("a", "b", "c", "alpha", "beta", "gamma"):
            if not ucell_map.has_key(key):
                ConsoleOutput.debug("ucell_map missing: %s" % (key))
                return

        if ucell_map.has_key("space_group"):
            self.struct.unit_cell = UnitCell.UnitCell(
                a=ucell_map["a"],
                b=ucell_map["b"],
                c=ucell_map["c"],
                alpha=ucell_map["alpha"],
                beta=ucell_map["beta"],
                gamma=ucell_map["gamma"],
                space_group=ucell_map["space_group"])
        else:
            self.struct.unit_cell = UnitCell.UnitCell(a=ucell_map["a"],
                                                      b=ucell_map["b"],
                                                      c=ucell_map["c"],
                                                      alpha=ucell_map["alpha"],
                                                      beta=ucell_map["beta"],
                                                      gamma=ucell_map["gamma"])
Example #2
0
    def load_unit_cell(self, ucell_map):
        """Called by the implementation of load_metadata to load the unit cell 
        parameters for the structure.
        """
        for key in ("a", "b", "c", "alpha", "beta", "gamma"):
            if not ucell_map.has_key(key):
                ConsoleOutput.debug("ucell_map missing: %s" % (key))
                return

        if ucell_map.has_key("space_group"):
            self.struct.unit_cell = UnitCell.UnitCell(
                a = ucell_map["a"],
                b = ucell_map["b"],
                c = ucell_map["c"],
                alpha = ucell_map["alpha"],
                beta = ucell_map["beta"],
                gamma = ucell_map["gamma"],
                space_group = ucell_map["space_group"])
        else:
            self.struct.unit_cell = UnitCell.UnitCell(
                a = ucell_map["a"],
                b = ucell_map["b"],
                c = ucell_map["c"],
                alpha = ucell_map["alpha"],
                beta = ucell_map["beta"],
                gamma = ucell_map["gamma"])
Example #3
0
    def read_end_finalize(self):
        """Called for final cleanup after structure source reading is done. 
        Currently, this method does nothing but may be used in future versions.
        """
        ConsoleOutput.debug("read_end_finalize()")

        ## calculate sequences for all chains
        if self.calc_sequence is True:
            for model in self.struct.iter_models():
                for chain in model.iter_chains():
                    if len(chain.sequence) == 0:
                        chain.sequence.set_from_fragments(chain.iter_fragments())

        ## build bonds as defined in the monomer library
        if self.library_bonds is True:
            self.struct.add_bonds_from_library()

        ## build bonds by covalent distance calculations
        if self.distance_bonds is True:
            self.struct.add_bonds_from_covalent_distance()
    def read_end_finalize(self):
        """Called for final cleanup after structure source reading is done. 
        Currently, this method does nothing but may be used in future versions.
        """
        ConsoleOutput.debug("read_end_finalize()")

        ## calculate sequences for all chains
        if self.calc_sequence is True:
            for model in self.struct.iter_models():
                for chain in model.iter_chains():
                    if len(chain.sequence) == 0:
                        chain.sequence.set_from_fragments(
                            chain.iter_fragments())

        ## build bonds as defined in the monomer library
        if self.library_bonds is True:
            self.struct.add_bonds_from_library()

        ## build bonds by covalent distance calculations
        if self.distance_bonds is True:
            self.struct.add_bonds_from_covalent_distance()
Example #5
0
    def name_service(self):
        """Runs the name service on all atoms needing to be named.  This is
        a complicated function which corrects most commonly found errors and
        omitions from PDB files.
        """
        if len(self.name_service_list) == 0:
            return

        ## returns the next available chain_id in self.struct
        ## XXX: it's possible to run out of chain IDs!
        def next_chain_id(suggest_chain_id):
            if suggest_chain_id != "":
                chain = self.struct.get_chain(suggest_chain_id)
                if not chain:
                    return suggest_chain_id

            for chain_id in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789":
                chain = self.struct.get_chain(chain_id)
                if not chain:
                    return chain_id

            raise StructureBuilderError("name_service exhausted new chain_ids")

        ## NAME SERVICE FOR POLYMER ATOMS

        ## what if we are given a list of atoms with res_name, frag_id,
        ## and model_id where the frag_id are sequental?  they can be
        ## sequental several ways using insertion codes, but large breaks
        ## often denote chain breaks

        ## I need to handle the special case of a list of polymer residues
        ## which do not have chain_ids.   This requires a first pass over
        ## the atom list usind different rules than what I use for sorting
        ## out non-polymers

        current_polymer_type = None
        current_polymer_model_id = None
        current_polymer_chain_id = None
        current_polymer_frag_id = None
        current_polymer_res_name = None
        current_polymer_name_dict = None

        polymer_model_dict = {}
        current_frag = None
        current_frag_list = None

        for atm in self.name_service_list[:]:

            ## determine the polymer type of the atom
            if Library.library_is_amino_acid(atm.res_name):
                polymer_type = "protein"
            elif Library.library_is_nucleic_acid(atm.res_name):
                polymer_type = "dna"
            else:
                ## if the atom is not a polymer, we definately have a break
                ## in this chain
                current_polymer_type = None
                current_polymer_model_id = None
                current_polymer_chain_id = None
                current_polymer_frag_id = None
                current_polymer_res_name = None
                current_polymer_name_dict = None
                current_frag = None
                current_frag_list = None
                continue

            fragment_id = Structure.FragmentID(atm.fragment_id)

            ## now we deal with conditions which can terminate the current
            ## polymer chain
            if polymer_type!=current_polymer_type or \
               atm.model_id!=current_polymer_model_id or \
               atm.chain_id!=current_polymer_chain_id or \
               fragment_id<current_polymer_frag_id:

                current_polymer_type = polymer_type
                current_polymer_model_id = atm.model_id
                current_polymer_chain_id = atm.chain_id
                current_polymer_frag_id = Structure.FragmentID(atm.fragment_id)
                current_polymer_res_name = atm.res_name
                current_polymer_name_dict = {atm.name: True}

                ## create new fragment
                current_frag = [atm]
                current_frag_list = [current_frag]

                ## create new fragment list (chain)
                try:
                    model = polymer_model_dict[atm.model_id]
                except KeyError:
                    model = [current_frag_list]
                    polymer_model_dict[atm.model_id] = model
                else:
                    model.append(current_frag_list)

                ## we have now dealt with the atom, so it can be removed
                ## from the name service list
                self.name_service_list.remove(atm)
                continue

            ## if we get here, then we know this atom is destine for the
            ## current chain, and the algorithm needs to place the atom
            ## in the current fragment, or create a new fragment for it
            ## to go into; the conditions for it going into the current
            ## fragment are: it has it have the same res_name, and its
            ## atom name cannot conflict with the names of atoms already in
            ## in the fragment
            if atm.res_name != current_polymer_res_name or current_polymer_name_dict.has_key(
                    atm.name):

                current_polymer_res_name = atm.res_name
                current_polymer_name_dict = {atm.name: True}

                ## create new fragment and add it to the current fragment list
                current_frag = [atm]
                current_frag_list.append(current_frag)

                ## we have now dealt with the atom, so it can be removed
                ## from the name service list
                self.name_service_list.remove(atm)
                continue

            ## okay, put it in the current fragment
            current_frag.append(atm)
            self.name_service_list.remove(atm)

        ## now assign chain_ids and add the atoms to the structure
        model_ids = polymer_model_dict.keys()
        model_ids.sort()
        model_list = [polymer_model_dict[model_id] for model_id in model_ids]

        num_chains = 0
        for frag_list in polymer_model_dict.itervalues():
            num_chains = max(num_chains, len(frag_list))

        for chain_index in xrange(num_chains):
            ## get next availible chain_id
            chain_id = next_chain_id("")

            ## assign the chain_id to all the atoms in the chain
            ## TODO: check fragment_id too
            for model in model_list:
                frag_list = model[chain_index]

                for frag in frag_list:
                    for atm in frag:
                        atm.chain_id = chain_id
                        self.struct.add_atom(atm, True)

        ## free the memory used by the polymer naming service
        del polymer_model_dict
        del model_list

        ## NAME SERVICE FOR NON-POLYMER ATOMS
        ## cr = (chain_id, res_name)
        ##
        ## cr_dict[cr_key] = model_dict
        ##
        ## model_dict[model] = frag_list
        ##
        ## frag_list = [ frag1, frag2, frag3, ...]
        ##
        ## frag = [atm1, atm2, atm3, ...]

        cr_dict = {}
        cr_key_list = []

        frag_id = None
        frag = None
        name_dict = {}

        ## split atoms into fragments
        for atm in self.name_service_list:
            atm_id = (atm.name, atm.alt_loc)
            atm_frag_id = (atm.model_id, atm.chain_id, atm.fragment_id,
                           atm.res_name)

            ## if the atom fragment id matches the current fragment id
            ## and doesn't conflict with any other atom name in the fragment
            ## then add it to the fragment
            if atm_frag_id == frag_id and not name_dict.has_key(atm_id):
                frag.append(atm)
                name_dict[atm_id] = True

            else:
                cr_key = (atm.chain_id, atm.res_name)

                ### debug
                if frag:
                    ConsoleOutput.debug(
                        "name_service: fragment detected in cr=%s" %
                        (str(cr_key)))
                    for a in frag:
                        ConsoleOutput.debug("  " + str(a))
                ### /debug

                try:
                    model_dict = cr_dict[cr_key]
                except KeyError:
                    model_dict = cr_dict[cr_key] = {}
                    cr_key_list.append(cr_key)

                try:
                    frag_list = model_dict[atm.model_id]
                except KeyError:
                    frag_list = model_dict[atm.model_id] = []

                name_dict = {atm_id: True}
                frag_id = atm_frag_id
                frag = [atm]
                frag_list.append(frag)

        ## free self.name_service_list and other vars to save some memory
        del self.name_service_list

        new_chain_id = None
        fragment_id_num = None

        for cr_key in cr_key_list:
            ### debug
            ConsoleOutput.debug("name_service: chain_id / res_name keys")
            ConsoleOutput.debug("  cr_key: chain_id='%s' res_name='%s'" %
                                (cr_key[0], cr_key[1]))
            ### /debug

            ## get the next chain ID, use the cfr group's
            ## loaded chain_id if possible
            chain_id = next_chain_id(cr_key[0])

            ## if we are not out of chain IDs, use the new chain ID and
            ## reset the fragment_id
            if chain_id != None:
                new_chain_id = chain_id
                fragment_id_num = 0

            elif new_chain_id == None or fragment_id_num == None:
                ConsoleOutput.fatal(
                    "name_service: unable to assign any chain ids")

            ## get model dictionary
            model_dict = cr_dict[cr_key]

            ## inspect the model dictionary to determine the number
            ## of fragments in each model -- they should be the same
            ## and have a 1:1 cooraspondance; if not, match up the
            ## fragments as much as possible
            max_frags = -1
            for (model, frag_list) in model_dict.iteritems():
                frag_list_len = len(frag_list)

                if max_frags == -1:
                    max_frags = frag_list_len
                    continue

                if max_frags != frag_list_len:
                    strx = "name_service: model fragments not identical"
                    ConsoleOutput.debug(strx)
                    ConsoleOutput.warning(strx)
                    max_frags = max(max_frags, frag_list_len)

            ## now iterate through the fragment lists in parallel and assign
            ## the new chain_id and fragment_id
            for i in xrange(max_frags):
                fragment_id_num += 1

                for frag_list in model_dict.itervalues():
                    try:
                        frag = frag_list[i]
                    except IndexError:
                        continue

                    ## assign new chain_id and fragment_id, than place the
                    ## atom in the structure
                    for atm in frag:
                        atm.chain_id = new_chain_id
                        atm.fragment_id = str(fragment_id_num)
                        self.struct.add_atom(atm, True)

            ## logging
            ConsoleOutput.warning(
                "name_service(): added chain_id=%s, res_name=%s, num_residues=%d"
                % (new_chain_id, cr_key[1], fragment_id_num))
Example #6
0
    def name_service(self):
        """Runs the name service on all atoms needing to be named. This is a
        complicated function which corrects most commonly found errors and
        omissions from PDB files.
        """
        if len(self.name_service_list) == 0:
            return

        ## returns the next available chain_id in self.struct
        ## XXX: it's possible to run out of chain IDs!
        def next_chain_id(suggest_chain_id):
            if suggest_chain_id != "":
                chain = self.struct.get_chain(suggest_chain_id)
                if not chain:
                    return suggest_chain_id

            ## TODO: Add the following alphanumeric string to Constants.py, 2010-09-21
            for chain_id in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789":
                chain = self.struct.get_chain(chain_id)
                if not chain:
                    return chain_id

            raise StructureBuilderError("name_service exhausted new chain_ids")


        ## NAME SERVICE FOR POLYMER ATOMS

        ## What if we are given a list of atoms with res_name, frag_id, and 
        ## model_id where the frag_id are sequential? They can be sequential 
        ## several ways using insertion codes, but large breaks often denote 
        ## chain breaks.

        ## We need to handle the special case of a list of polymer residues
        ## which do not have chain_ids. This requires a first pass over the
        ## atom list using different rules than what we use for sorting out
        ## non-polymers.

        current_polymer_type      = None
        current_polymer_model_id  = None
        current_polymer_chain_id  = None
        current_polymer_frag_id   = None
        current_polymer_res_name  = None
        current_polymer_name_dict = None

        polymer_model_dict = {}
        current_frag       = None
        current_frag_list  = None

        for atm in self.name_service_list[:]:
            ## determine the polymer type of the atom
            if Library.library_is_amino_acid(atm.res_name):
                polymer_type = "protein"
            elif Library.library_is_nucleic_acid(atm.res_name):
                polymer_type = "dna"
            else:
                ## if the atom is not a polymer, we definitely have a break
                ## in this chain
                current_polymer_type      = None
                current_polymer_model_id  = None
                current_polymer_chain_id  = None
                current_polymer_frag_id   = None
                current_polymer_res_name  = None
                current_polymer_name_dict = None
                current_frag              = None
                current_frag_list         = None
                continue

            fragment_id = Structure.FragmentID(atm.fragment_id)

            ## now we deal with conditions which can terminate the current
            ## polymer chain
            if polymer_type!=current_polymer_type or \
               atm.model_id!=current_polymer_model_id or \
               atm.chain_id!=current_polymer_chain_id or \
               fragment_id<current_polymer_frag_id:

                current_polymer_type      = polymer_type
                current_polymer_model_id  = atm.model_id
                current_polymer_chain_id  = atm.chain_id
                current_polymer_frag_id   = Structure.FragmentID(atm.fragment_id)
                current_polymer_res_name  = atm.res_name
                current_polymer_name_dict = {atm.name: True}

                ## create new fragment
                current_frag = [atm]
                current_frag_list = [current_frag]

                ## create new fragment list (chain)
                try:
                    model = polymer_model_dict[atm.model_id]
                except KeyError:
                    model = [current_frag_list]
                    polymer_model_dict[atm.model_id] = model
                else:
                    model.append(current_frag_list)

                ## we have now dealt with the atom, so it can be removed from 
                ## the name service list
                self.name_service_list.remove(atm)
                continue

            ## if we get here, then we know this atom is destine for the
            ## current chain, and the algorithm needs to place the atom
            ## in the current fragment, or create a new fragment for it
            ## to go into; the conditions for it going into the current
            ## fragment are: it has it have the same res_name, and its
            ## atom name cannot conflict with the names of atoms already in
            ## in the fragment
            if atm.res_name != current_polymer_res_name or current_polymer_name_dict.has_key(atm.name):
                current_polymer_res_name  = atm.res_name
                current_polymer_name_dict = {atm.name: True}

                ## create new fragment and add it to the current fragment list
                current_frag = [atm]
                current_frag_list.append(current_frag)

                ## we have now dealt with the atom, so it can be removed
                ## from the name service list
                self.name_service_list.remove(atm)
                continue

            ## okay, put it in the current fragment
            current_frag.append(atm)
            self.name_service_list.remove(atm)

        ## now assign chain_ids and add the atoms to the structure
        model_ids = polymer_model_dict.keys()
        model_ids.sort()
        model_list = [polymer_model_dict[model_id] for model_id in model_ids]

        num_chains = 0
        for frag_list in polymer_model_dict.itervalues():
            num_chains = max(num_chains, len(frag_list))

        for chain_index in xrange(num_chains):
            ## get next available chain_id
            chain_id = next_chain_id("")

            ## assign the chain_id to all the atoms in the chain
            ## TODO: check fragment_id too, 2010-09-22
            for model in model_list:
                frag_list = model[chain_index]

                for frag in frag_list:
                    for atm in frag:
                        atm.chain_id = chain_id
                        self.struct.add_atom(atm, True)

        ## free the memory used by the polymer naming service
        del polymer_model_dict
        del model_list


        ## NAME SERVICE FOR NON-POLYMER ATOMS
        ## cr = (chain_id, res_name)
        ##
        ## cr_dict[cr_key] = model_dict
        ##
        ## model_dict[model] = frag_list
        ##
        ## frag_list = [ frag1, frag2, frag3, ...]
        ##
        ## frag = [atm1, atm2, atm3, ...]
        cr_dict      = {}
        cr_key_list  = []

        frag_id   = None
        frag      = None
        name_dict = {}

        ## split atoms into fragments
        for atm in self.name_service_list:
            atm_id      = (atm.name, atm.alt_loc)
            atm_frag_id = (atm.model_id, atm.chain_id, atm.fragment_id, atm.res_name)

            ## if the atom fragment id matches the current fragment id
            ## and doesn't conflict with any other atom name in the fragment
            ## then add it to the fragment
            if atm_frag_id==frag_id and not name_dict.has_key(atm_id):
                frag.append(atm)
                name_dict[atm_id] = True

            else:
                cr_key = (atm.chain_id, atm.res_name)

                ### debug
                if frag:
                    msg = "name_service: fragment detected in cr=%s" % (
                        str(cr_key))
                    ConsoleOutput.debug(msg)
                    for a in frag:
                        ConsoleOutput.debug("  " + str(a))
                ### /debug

                try:
                    model_dict = cr_dict[cr_key]
                except KeyError:
                    model_dict = cr_dict[cr_key] = {}
                    cr_key_list.append(cr_key)

                try:
                    frag_list = model_dict[atm.model_id]
                except KeyError:
                    frag_list = model_dict[atm.model_id] = []

                name_dict = {atm_id: True}
                frag_id   = atm_frag_id
                frag      = [atm]
                frag_list.append(frag)

        ## free self.name_service_list and other vars to save some memory
        del self.name_service_list

        new_chain_id    = None
        fragment_id_num = None

        for cr_key in cr_key_list:
            ### debug
            msg  = "name_service: chain_id / res_name keys\n"
            msg += "  cr_key: chain_id='%s' res_name='%s'" % (
                cr_key[0], cr_key[1])
            ConsoleOutput.debug(msg)
            ### /debug

            ## get the next chain ID, use the cfr group's
            ## loaded chain_id if possible
            chain_id = next_chain_id(cr_key[0])

            ## if we are not out of chain IDs, use the new chain ID and
            ## reset the fragment_id
            if chain_id != None:
                new_chain_id    = chain_id
                fragment_id_num = 0

            elif new_chain_id == None or fragment_id_num == None:
                ConsoleOutput.fatal("name_service: unable to assign any chain ids")

            ## get model dictionary
            model_dict = cr_dict[cr_key]

            ## inspect the model dictionary to determine the number of 
            ## fragments in each model -- they should be the same
            ## and have a 1:1 correspondence; if not, match up the
            ## fragments as much as possible
            max_frags = -1
            for (model, frag_list) in model_dict.iteritems():
                frag_list_len = len(frag_list)

                if max_frags == -1:
                    max_frags = frag_list_len
                    continue

                if max_frags != frag_list_len:
                    strx = "name_service: model fragments not identical"
                    ConsoleOutput.debug(strx)
                    ConsoleOutput.warning(strx)
                    max_frags = max(max_frags, frag_list_len)

            ## now iterate through the fragment lists in parallel and assign
            ## the new chain_id and fragment_id
            for i in xrange(max_frags):
                fragment_id_num += 1

                for frag_list in model_dict.itervalues():
                    try:
                        frag = frag_list[i]
                    except IndexError:
                        continue

                    ## assign new chain_id and fragment_id, than place the
                    ## atom in the structure
                    for atm in frag:
                        atm.chain_id = new_chain_id
                        atm.fragment_id = str(fragment_id_num)
                        self.struct.add_atom(atm, True)

            ## logging
            ConsoleOutput.warning("name_service(): added chain_id=%s, res_name=%s, num_residues=%d" % (
                new_chain_id, cr_key[1], fragment_id_num))