def load_unit_cell(self, ucell_map): """Called by the implementation of load_metadata to load the unit cell pararameters for the structure. """ for key in ("a", "b", "c", "alpha", "beta", "gamma"): if not ucell_map.has_key(key): ConsoleOutput.debug("ucell_map missing: %s" % (key)) return if ucell_map.has_key("space_group"): self.struct.unit_cell = UnitCell.UnitCell( a=ucell_map["a"], b=ucell_map["b"], c=ucell_map["c"], alpha=ucell_map["alpha"], beta=ucell_map["beta"], gamma=ucell_map["gamma"], space_group=ucell_map["space_group"]) else: self.struct.unit_cell = UnitCell.UnitCell(a=ucell_map["a"], b=ucell_map["b"], c=ucell_map["c"], alpha=ucell_map["alpha"], beta=ucell_map["beta"], gamma=ucell_map["gamma"])
def load_unit_cell(self, ucell_map): """Called by the implementation of load_metadata to load the unit cell parameters for the structure. """ for key in ("a", "b", "c", "alpha", "beta", "gamma"): if not ucell_map.has_key(key): ConsoleOutput.debug("ucell_map missing: %s" % (key)) return if ucell_map.has_key("space_group"): self.struct.unit_cell = UnitCell.UnitCell( a = ucell_map["a"], b = ucell_map["b"], c = ucell_map["c"], alpha = ucell_map["alpha"], beta = ucell_map["beta"], gamma = ucell_map["gamma"], space_group = ucell_map["space_group"]) else: self.struct.unit_cell = UnitCell.UnitCell( a = ucell_map["a"], b = ucell_map["b"], c = ucell_map["c"], alpha = ucell_map["alpha"], beta = ucell_map["beta"], gamma = ucell_map["gamma"])
def read_end_finalize(self): """Called for final cleanup after structure source reading is done. Currently, this method does nothing but may be used in future versions. """ ConsoleOutput.debug("read_end_finalize()") ## calculate sequences for all chains if self.calc_sequence is True: for model in self.struct.iter_models(): for chain in model.iter_chains(): if len(chain.sequence) == 0: chain.sequence.set_from_fragments(chain.iter_fragments()) ## build bonds as defined in the monomer library if self.library_bonds is True: self.struct.add_bonds_from_library() ## build bonds by covalent distance calculations if self.distance_bonds is True: self.struct.add_bonds_from_covalent_distance()
def read_end_finalize(self): """Called for final cleanup after structure source reading is done. Currently, this method does nothing but may be used in future versions. """ ConsoleOutput.debug("read_end_finalize()") ## calculate sequences for all chains if self.calc_sequence is True: for model in self.struct.iter_models(): for chain in model.iter_chains(): if len(chain.sequence) == 0: chain.sequence.set_from_fragments( chain.iter_fragments()) ## build bonds as defined in the monomer library if self.library_bonds is True: self.struct.add_bonds_from_library() ## build bonds by covalent distance calculations if self.distance_bonds is True: self.struct.add_bonds_from_covalent_distance()
def name_service(self): """Runs the name service on all atoms needing to be named. This is a complicated function which corrects most commonly found errors and omitions from PDB files. """ if len(self.name_service_list) == 0: return ## returns the next available chain_id in self.struct ## XXX: it's possible to run out of chain IDs! def next_chain_id(suggest_chain_id): if suggest_chain_id != "": chain = self.struct.get_chain(suggest_chain_id) if not chain: return suggest_chain_id for chain_id in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789": chain = self.struct.get_chain(chain_id) if not chain: return chain_id raise StructureBuilderError("name_service exhausted new chain_ids") ## NAME SERVICE FOR POLYMER ATOMS ## what if we are given a list of atoms with res_name, frag_id, ## and model_id where the frag_id are sequental? they can be ## sequental several ways using insertion codes, but large breaks ## often denote chain breaks ## I need to handle the special case of a list of polymer residues ## which do not have chain_ids. This requires a first pass over ## the atom list usind different rules than what I use for sorting ## out non-polymers current_polymer_type = None current_polymer_model_id = None current_polymer_chain_id = None current_polymer_frag_id = None current_polymer_res_name = None current_polymer_name_dict = None polymer_model_dict = {} current_frag = None current_frag_list = None for atm in self.name_service_list[:]: ## determine the polymer type of the atom if Library.library_is_amino_acid(atm.res_name): polymer_type = "protein" elif Library.library_is_nucleic_acid(atm.res_name): polymer_type = "dna" else: ## if the atom is not a polymer, we definately have a break ## in this chain current_polymer_type = None current_polymer_model_id = None current_polymer_chain_id = None current_polymer_frag_id = None current_polymer_res_name = None current_polymer_name_dict = None current_frag = None current_frag_list = None continue fragment_id = Structure.FragmentID(atm.fragment_id) ## now we deal with conditions which can terminate the current ## polymer chain if polymer_type!=current_polymer_type or \ atm.model_id!=current_polymer_model_id or \ atm.chain_id!=current_polymer_chain_id or \ fragment_id<current_polymer_frag_id: current_polymer_type = polymer_type current_polymer_model_id = atm.model_id current_polymer_chain_id = atm.chain_id current_polymer_frag_id = Structure.FragmentID(atm.fragment_id) current_polymer_res_name = atm.res_name current_polymer_name_dict = {atm.name: True} ## create new fragment current_frag = [atm] current_frag_list = [current_frag] ## create new fragment list (chain) try: model = polymer_model_dict[atm.model_id] except KeyError: model = [current_frag_list] polymer_model_dict[atm.model_id] = model else: model.append(current_frag_list) ## we have now dealt with the atom, so it can be removed ## from the name service list self.name_service_list.remove(atm) continue ## if we get here, then we know this atom is destine for the ## current chain, and the algorithm needs to place the atom ## in the current fragment, or create a new fragment for it ## to go into; the conditions for it going into the current ## fragment are: it has it have the same res_name, and its ## atom name cannot conflict with the names of atoms already in ## in the fragment if atm.res_name != current_polymer_res_name or current_polymer_name_dict.has_key( atm.name): current_polymer_res_name = atm.res_name current_polymer_name_dict = {atm.name: True} ## create new fragment and add it to the current fragment list current_frag = [atm] current_frag_list.append(current_frag) ## we have now dealt with the atom, so it can be removed ## from the name service list self.name_service_list.remove(atm) continue ## okay, put it in the current fragment current_frag.append(atm) self.name_service_list.remove(atm) ## now assign chain_ids and add the atoms to the structure model_ids = polymer_model_dict.keys() model_ids.sort() model_list = [polymer_model_dict[model_id] for model_id in model_ids] num_chains = 0 for frag_list in polymer_model_dict.itervalues(): num_chains = max(num_chains, len(frag_list)) for chain_index in xrange(num_chains): ## get next availible chain_id chain_id = next_chain_id("") ## assign the chain_id to all the atoms in the chain ## TODO: check fragment_id too for model in model_list: frag_list = model[chain_index] for frag in frag_list: for atm in frag: atm.chain_id = chain_id self.struct.add_atom(atm, True) ## free the memory used by the polymer naming service del polymer_model_dict del model_list ## NAME SERVICE FOR NON-POLYMER ATOMS ## cr = (chain_id, res_name) ## ## cr_dict[cr_key] = model_dict ## ## model_dict[model] = frag_list ## ## frag_list = [ frag1, frag2, frag3, ...] ## ## frag = [atm1, atm2, atm3, ...] cr_dict = {} cr_key_list = [] frag_id = None frag = None name_dict = {} ## split atoms into fragments for atm in self.name_service_list: atm_id = (atm.name, atm.alt_loc) atm_frag_id = (atm.model_id, atm.chain_id, atm.fragment_id, atm.res_name) ## if the atom fragment id matches the current fragment id ## and doesn't conflict with any other atom name in the fragment ## then add it to the fragment if atm_frag_id == frag_id and not name_dict.has_key(atm_id): frag.append(atm) name_dict[atm_id] = True else: cr_key = (atm.chain_id, atm.res_name) ### debug if frag: ConsoleOutput.debug( "name_service: fragment detected in cr=%s" % (str(cr_key))) for a in frag: ConsoleOutput.debug(" " + str(a)) ### /debug try: model_dict = cr_dict[cr_key] except KeyError: model_dict = cr_dict[cr_key] = {} cr_key_list.append(cr_key) try: frag_list = model_dict[atm.model_id] except KeyError: frag_list = model_dict[atm.model_id] = [] name_dict = {atm_id: True} frag_id = atm_frag_id frag = [atm] frag_list.append(frag) ## free self.name_service_list and other vars to save some memory del self.name_service_list new_chain_id = None fragment_id_num = None for cr_key in cr_key_list: ### debug ConsoleOutput.debug("name_service: chain_id / res_name keys") ConsoleOutput.debug(" cr_key: chain_id='%s' res_name='%s'" % (cr_key[0], cr_key[1])) ### /debug ## get the next chain ID, use the cfr group's ## loaded chain_id if possible chain_id = next_chain_id(cr_key[0]) ## if we are not out of chain IDs, use the new chain ID and ## reset the fragment_id if chain_id != None: new_chain_id = chain_id fragment_id_num = 0 elif new_chain_id == None or fragment_id_num == None: ConsoleOutput.fatal( "name_service: unable to assign any chain ids") ## get model dictionary model_dict = cr_dict[cr_key] ## inspect the model dictionary to determine the number ## of fragments in each model -- they should be the same ## and have a 1:1 cooraspondance; if not, match up the ## fragments as much as possible max_frags = -1 for (model, frag_list) in model_dict.iteritems(): frag_list_len = len(frag_list) if max_frags == -1: max_frags = frag_list_len continue if max_frags != frag_list_len: strx = "name_service: model fragments not identical" ConsoleOutput.debug(strx) ConsoleOutput.warning(strx) max_frags = max(max_frags, frag_list_len) ## now iterate through the fragment lists in parallel and assign ## the new chain_id and fragment_id for i in xrange(max_frags): fragment_id_num += 1 for frag_list in model_dict.itervalues(): try: frag = frag_list[i] except IndexError: continue ## assign new chain_id and fragment_id, than place the ## atom in the structure for atm in frag: atm.chain_id = new_chain_id atm.fragment_id = str(fragment_id_num) self.struct.add_atom(atm, True) ## logging ConsoleOutput.warning( "name_service(): added chain_id=%s, res_name=%s, num_residues=%d" % (new_chain_id, cr_key[1], fragment_id_num))
def name_service(self): """Runs the name service on all atoms needing to be named. This is a complicated function which corrects most commonly found errors and omissions from PDB files. """ if len(self.name_service_list) == 0: return ## returns the next available chain_id in self.struct ## XXX: it's possible to run out of chain IDs! def next_chain_id(suggest_chain_id): if suggest_chain_id != "": chain = self.struct.get_chain(suggest_chain_id) if not chain: return suggest_chain_id ## TODO: Add the following alphanumeric string to Constants.py, 2010-09-21 for chain_id in "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789": chain = self.struct.get_chain(chain_id) if not chain: return chain_id raise StructureBuilderError("name_service exhausted new chain_ids") ## NAME SERVICE FOR POLYMER ATOMS ## What if we are given a list of atoms with res_name, frag_id, and ## model_id where the frag_id are sequential? They can be sequential ## several ways using insertion codes, but large breaks often denote ## chain breaks. ## We need to handle the special case of a list of polymer residues ## which do not have chain_ids. This requires a first pass over the ## atom list using different rules than what we use for sorting out ## non-polymers. current_polymer_type = None current_polymer_model_id = None current_polymer_chain_id = None current_polymer_frag_id = None current_polymer_res_name = None current_polymer_name_dict = None polymer_model_dict = {} current_frag = None current_frag_list = None for atm in self.name_service_list[:]: ## determine the polymer type of the atom if Library.library_is_amino_acid(atm.res_name): polymer_type = "protein" elif Library.library_is_nucleic_acid(atm.res_name): polymer_type = "dna" else: ## if the atom is not a polymer, we definitely have a break ## in this chain current_polymer_type = None current_polymer_model_id = None current_polymer_chain_id = None current_polymer_frag_id = None current_polymer_res_name = None current_polymer_name_dict = None current_frag = None current_frag_list = None continue fragment_id = Structure.FragmentID(atm.fragment_id) ## now we deal with conditions which can terminate the current ## polymer chain if polymer_type!=current_polymer_type or \ atm.model_id!=current_polymer_model_id or \ atm.chain_id!=current_polymer_chain_id or \ fragment_id<current_polymer_frag_id: current_polymer_type = polymer_type current_polymer_model_id = atm.model_id current_polymer_chain_id = atm.chain_id current_polymer_frag_id = Structure.FragmentID(atm.fragment_id) current_polymer_res_name = atm.res_name current_polymer_name_dict = {atm.name: True} ## create new fragment current_frag = [atm] current_frag_list = [current_frag] ## create new fragment list (chain) try: model = polymer_model_dict[atm.model_id] except KeyError: model = [current_frag_list] polymer_model_dict[atm.model_id] = model else: model.append(current_frag_list) ## we have now dealt with the atom, so it can be removed from ## the name service list self.name_service_list.remove(atm) continue ## if we get here, then we know this atom is destine for the ## current chain, and the algorithm needs to place the atom ## in the current fragment, or create a new fragment for it ## to go into; the conditions for it going into the current ## fragment are: it has it have the same res_name, and its ## atom name cannot conflict with the names of atoms already in ## in the fragment if atm.res_name != current_polymer_res_name or current_polymer_name_dict.has_key(atm.name): current_polymer_res_name = atm.res_name current_polymer_name_dict = {atm.name: True} ## create new fragment and add it to the current fragment list current_frag = [atm] current_frag_list.append(current_frag) ## we have now dealt with the atom, so it can be removed ## from the name service list self.name_service_list.remove(atm) continue ## okay, put it in the current fragment current_frag.append(atm) self.name_service_list.remove(atm) ## now assign chain_ids and add the atoms to the structure model_ids = polymer_model_dict.keys() model_ids.sort() model_list = [polymer_model_dict[model_id] for model_id in model_ids] num_chains = 0 for frag_list in polymer_model_dict.itervalues(): num_chains = max(num_chains, len(frag_list)) for chain_index in xrange(num_chains): ## get next available chain_id chain_id = next_chain_id("") ## assign the chain_id to all the atoms in the chain ## TODO: check fragment_id too, 2010-09-22 for model in model_list: frag_list = model[chain_index] for frag in frag_list: for atm in frag: atm.chain_id = chain_id self.struct.add_atom(atm, True) ## free the memory used by the polymer naming service del polymer_model_dict del model_list ## NAME SERVICE FOR NON-POLYMER ATOMS ## cr = (chain_id, res_name) ## ## cr_dict[cr_key] = model_dict ## ## model_dict[model] = frag_list ## ## frag_list = [ frag1, frag2, frag3, ...] ## ## frag = [atm1, atm2, atm3, ...] cr_dict = {} cr_key_list = [] frag_id = None frag = None name_dict = {} ## split atoms into fragments for atm in self.name_service_list: atm_id = (atm.name, atm.alt_loc) atm_frag_id = (atm.model_id, atm.chain_id, atm.fragment_id, atm.res_name) ## if the atom fragment id matches the current fragment id ## and doesn't conflict with any other atom name in the fragment ## then add it to the fragment if atm_frag_id==frag_id and not name_dict.has_key(atm_id): frag.append(atm) name_dict[atm_id] = True else: cr_key = (atm.chain_id, atm.res_name) ### debug if frag: msg = "name_service: fragment detected in cr=%s" % ( str(cr_key)) ConsoleOutput.debug(msg) for a in frag: ConsoleOutput.debug(" " + str(a)) ### /debug try: model_dict = cr_dict[cr_key] except KeyError: model_dict = cr_dict[cr_key] = {} cr_key_list.append(cr_key) try: frag_list = model_dict[atm.model_id] except KeyError: frag_list = model_dict[atm.model_id] = [] name_dict = {atm_id: True} frag_id = atm_frag_id frag = [atm] frag_list.append(frag) ## free self.name_service_list and other vars to save some memory del self.name_service_list new_chain_id = None fragment_id_num = None for cr_key in cr_key_list: ### debug msg = "name_service: chain_id / res_name keys\n" msg += " cr_key: chain_id='%s' res_name='%s'" % ( cr_key[0], cr_key[1]) ConsoleOutput.debug(msg) ### /debug ## get the next chain ID, use the cfr group's ## loaded chain_id if possible chain_id = next_chain_id(cr_key[0]) ## if we are not out of chain IDs, use the new chain ID and ## reset the fragment_id if chain_id != None: new_chain_id = chain_id fragment_id_num = 0 elif new_chain_id == None or fragment_id_num == None: ConsoleOutput.fatal("name_service: unable to assign any chain ids") ## get model dictionary model_dict = cr_dict[cr_key] ## inspect the model dictionary to determine the number of ## fragments in each model -- they should be the same ## and have a 1:1 correspondence; if not, match up the ## fragments as much as possible max_frags = -1 for (model, frag_list) in model_dict.iteritems(): frag_list_len = len(frag_list) if max_frags == -1: max_frags = frag_list_len continue if max_frags != frag_list_len: strx = "name_service: model fragments not identical" ConsoleOutput.debug(strx) ConsoleOutput.warning(strx) max_frags = max(max_frags, frag_list_len) ## now iterate through the fragment lists in parallel and assign ## the new chain_id and fragment_id for i in xrange(max_frags): fragment_id_num += 1 for frag_list in model_dict.itervalues(): try: frag = frag_list[i] except IndexError: continue ## assign new chain_id and fragment_id, than place the ## atom in the structure for atm in frag: atm.chain_id = new_chain_id atm.fragment_id = str(fragment_id_num) self.struct.add_atom(atm, True) ## logging ConsoleOutput.warning("name_service(): added chain_id=%s, res_name=%s, num_residues=%d" % ( new_chain_id, cr_key[1], fragment_id_num))