def combine_ligands_and_hierarchy(pdb_hierarchy, ligands, log=None): from iotbx.pdb import hierarchy if (log is None) : log = null_out() chain_id_counts = {} model = pdb_hierarchy.models()[0] for i_lig, ligand in enumerate(ligands): xyz_mean = ligand.atoms().extract_xyz().mean() best_chain = None min_dist = sys.maxsize for chain in model.chains(): last_resseq = chain.residue_groups()[-1].resseq_as_int() if ((not chain.id in chain_id_counts) or (chain_id_counts[chain.id] < last_resseq)): chain_id_counts[chain.id] = last_resseq if (not chain.is_protein()) : continue chain_xyz_mean = chain.atoms().extract_xyz().mean() dist = xyz_distance(chain_xyz_mean, xyz_mean) if (dist < min_dist): min_dist = dist best_chain = chain best_chain_id = " " if (best_chain is not None): best_chain_id = best_chain.id new_chain = hierarchy.chain(id=best_chain_id) new_rg = hierarchy.residue_group() new_resseq = 1 if (best_chain_id in chain_id_counts): new_resseq = chain_id_counts[best_chain_id] + 1 print(" ligand %d: chain='%s' resseq=%s" % (i_lig+1, best_chain_id, new_resseq), file=log) new_rg.resseq = new_resseq new_rg.append_atom_group(ligand) new_chain.append_residue_group(new_rg) model.append_chain(new_chain) chain_id_counts[best_chain_id] = new_resseq
def combine_ligands_and_hierarchy (pdb_hierarchy, ligands, log=None) : from iotbx.pdb import hierarchy if (log is None) : log = null_out() chain_id_counts = {} model = pdb_hierarchy.models()[0] for i_lig, ligand in enumerate(ligands) : xyz_mean = ligand.atoms().extract_xyz().mean() best_chain = None min_dist = sys.maxint for chain in model.chains() : last_resseq = chain.residue_groups()[-1].resseq_as_int() if ((not chain.id in chain_id_counts) or (chain_id_counts[chain.id] < last_resseq)) : chain_id_counts[chain.id] = last_resseq if (not chain.is_protein()) : continue chain_xyz_mean = chain.atoms().extract_xyz().mean() dist = xyz_distance(chain_xyz_mean, xyz_mean) if (dist < min_dist) : min_dist = dist best_chain = chain best_chain_id = " " if (best_chain is not None) : best_chain_id = best_chain.id new_chain = hierarchy.chain(id=best_chain_id) new_rg = hierarchy.residue_group() new_resseq = 1 if (best_chain_id in chain_id_counts) : new_resseq = chain_id_counts[best_chain_id] + 1 print >> log, " ligand %d: chain='%s' resseq=%s" % (i_lig+1, best_chain_id, new_resseq) new_rg.resseq = new_resseq new_rg.append_atom_group(ligand) new_chain.append_residue_group(new_rg) model.append_chain(new_chain) chain_id_counts[best_chain_id] = new_resseq
def __init__(self, cif_block): crystal_symmetry_builder.__init__(self, cif_block) self.hierarchy = hierarchy.root() # These items are mandatory for the _atom_site loop, all others are optional type_symbol = self._wrap_loop_if_needed(cif_block, "_atom_site.type_symbol") atom_labels = self._wrap_loop_if_needed(cif_block, "_atom_site.auth_atom_id") if atom_labels is None: atom_labels = self._wrap_loop_if_needed( cif_block, "_atom_site.label_atom_id" ) # corresponds to chem comp atom name alt_id = self._wrap_loop_if_needed( cif_block, "_atom_site.label_alt_id") # alternate conformer id label_asym_id = self._wrap_loop_if_needed( cif_block, "_atom_site.label_asym_id") # chain id auth_asym_id = self._wrap_loop_if_needed(cif_block, "_atom_site.auth_asym_id") if label_asym_id is None: label_asym_id = auth_asym_id if auth_asym_id is None: auth_asym_id = label_asym_id comp_id = self._wrap_loop_if_needed(cif_block, "_atom_site.auth_comp_id") if comp_id is None: comp_id = self._wrap_loop_if_needed( cif_block, "_atom_site.label_comp_id") # residue name entity_id = self._wrap_loop_if_needed(cif_block, "_atom_site.label_entity_id") seq_id = self._wrap_loop_if_needed(cif_block, "_atom_site.auth_seq_id") if seq_id is None: seq_id = self._wrap_loop_if_needed( cif_block, "_atom_site.label_seq_id") # residue number assert [atom_labels, alt_id, auth_asym_id, comp_id, entity_id, seq_id].count(None) == 0, "someting is not present" assert type_symbol is not None atom_site_fp = cif_block.get('_atom_site.phenix_scat_dispersion_real') atom_site_fdp = cif_block.get('_atom_site.phenix_scat_dispersion_imag') pdb_ins_code = cif_block.get( "_atom_site.pdbx_PDB_ins_code") # insertion code model_ids = cif_block.get("_atom_site.pdbx_PDB_model_num") atom_site_id = cif_block.get("_atom_site.id") # only permitted values are ATOM or HETATM group_PDB = cif_block.get("_atom_site.group_PDB") # TODO: read esds B_iso_or_equiv = flex.double( self._wrap_loop_if_needed(cif_block, "_atom_site.B_iso_or_equiv")) cart_x = flex.double( self._wrap_loop_if_needed(cif_block, "_atom_site.Cartn_x")) cart_y = flex.double( self._wrap_loop_if_needed(cif_block, "_atom_site.Cartn_y")) cart_z = flex.double( self._wrap_loop_if_needed(cif_block, "_atom_site.Cartn_z")) occu = flex.double( self._wrap_loop_if_needed(cif_block, "_atom_site.occupancy")) formal_charge = self._wrap_loop_if_needed( cif_block, "_atom_site.pdbx_formal_charge") # anisotropic b-factors # TODO: read esds anisotrop_id = self._wrap_loop_if_needed(cif_block, "_atom_site_anisotrop.id") adps = None if anisotrop_id is not None: u_ij = [ self._wrap_loop_if_needed( cif_block, "_atom_site_anisotrop.U[%s][%s]" % (ij[0], ij[1])) for ij in ("11", "22", "33", "12", "13", "23") ] assert u_ij.count(None) in (0, 6) if u_ij.count(None) == 0: adps = u_ij else: assert u_ij.count(None) == 6 b_ij = [ self._wrap_loop_if_needed( cif_block, "_atom_site_anisotrop.B[%s][%s]" % (ij[0], ij[1])) for ij in ("11", "22", "33", "12", "13", "23") ] assert b_ij.count(None) in (0, 6) if b_ij.count(None) == 0: adps = adptbx.b_as_u(b_ij) assert not (u_ij.count(None) and b_ij.count(None) ) # illegal for both to be present if adps is not None: try: adps = [flex.double(adp) for adp in adps] except ValueError as e: raise CifBuilderError("Error interpreting ADPs: " + str(e)) adps = flex.sym_mat3_double(*adps) py_adps = {} if anisotrop_id is not None and adps is not None: for an_id, adp in zip(list(anisotrop_id), list(adps)): py_adps[an_id] = adp current_model_id = None current_label_asym_id = None current_auth_asym_id = None current_residue_id = None current_ins_code = None for i_atom in range(atom_labels.size()): # model(s) last_model_id = current_model_id current_model_id = model_ids[i_atom] assert current_model_id is not None if current_model_id != last_model_id: model = hierarchy.model(id=current_model_id) self.hierarchy.append_model(model) # chain(s) last_label_asym_id = current_label_asym_id current_label_asym_id = label_asym_id[i_atom] assert current_label_asym_id is not None last_auth_asym_id = current_auth_asym_id current_auth_asym_id = auth_asym_id[i_atom] assert current_auth_asym_id not in [".", "?", " "], "mmCIF file contains " + \ "record with empty auth_asym_id, which is wrong." assert current_label_asym_id is not None if (current_auth_asym_id != last_auth_asym_id or current_model_id != last_model_id): chain = hierarchy.chain(id=current_auth_asym_id) model.append_chain(chain) else: assert current_auth_asym_id == last_auth_asym_id # residue_group(s) # defined by residue id and insertion code last_residue_id = current_residue_id current_residue_id = seq_id[i_atom] assert current_residue_id is not None last_ins_code = current_ins_code if pdb_ins_code is not None: current_ins_code = pdb_ins_code[i_atom] if current_ins_code in ("?", ".", None): current_ins_code = " " if (current_residue_id != last_residue_id or current_ins_code != last_ins_code or current_auth_asym_id != last_auth_asym_id or current_model_id != last_model_id): try: resseq = hy36encode(width=4, value=int(current_residue_id)) except ValueError as e: resseq = current_residue_id assert len(resseq) == 4 residue_group = hierarchy.residue_group(resseq=resseq, icode=current_ins_code) chain.append_residue_group(residue_group) atom_groups = OrderedDict() # reset atom_groups cache # atom_group(s) # defined by resname and altloc id current_altloc = alt_id[i_atom] if current_altloc == "." or current_altloc == "?": current_altloc = "" # Main chain atoms current_resname = comp_id[i_atom] if (current_altloc, current_resname) not in atom_groups: atom_group = hierarchy.atom_group(altloc=current_altloc, resname=current_resname) atom_groups[(current_altloc, current_resname)] = atom_group if current_altloc == "": residue_group.insert_atom_group(0, atom_group) else: residue_group.append_atom_group(atom_group) else: atom_group = atom_groups[(current_altloc, current_resname)] # atom(s) atom = hierarchy.atom() atom_group.append_atom(atom) atom.set_element(type_symbol[i_atom]) atom.set_name( format_pdb_atom_name(atom_labels[i_atom], type_symbol[i_atom])) atom.set_xyz(new_xyz=(cart_x[i_atom], cart_y[i_atom], cart_z[i_atom])) atom.set_b(B_iso_or_equiv[i_atom]) atom.set_occ(occu[i_atom]) # hy36encode should go once the pdb.hierarchy has been # modified to no longer store fixed-width strings atom.set_serial( hy36encode(width=5, value=int(atom_site_id[i_atom]))) # some code relies on an empty segid being 4 spaces atom.set_segid(" ") if group_PDB is not None and group_PDB[i_atom] == "HETATM": atom.hetero = True if formal_charge is not None: charge = formal_charge[i_atom] if charge not in ("?", "."): if charge.endswith("-") or charge.startswith("-"): sign = "-" else: sign = "+" charge = charge.strip(" -+") charge = int(charge) if charge == 0: sign = "" atom.set_charge("%i%s" % (charge, sign)) if atom_site_fp is not None: fp = atom_site_fp[i_atom] if fp not in ("?", "."): atom.set_fp(new_fp=float(fp)) if atom_site_fdp is not None: fdp = atom_site_fdp[i_atom] if fdp not in ("?", "."): atom.set_fdp(new_fdp=float(fdp)) if anisotrop_id is not None and adps is not None: py_u_ij = py_adps.get(atom.serial.strip(), None) if py_u_ij is not None: atom.set_uij(py_u_ij) if len(self.hierarchy.models()) == 1: # for compatibility with single-model PDB files self.hierarchy.models()[0].id = ""
def __init__(self, pdb_hierarchy, fmodel, ncs_operators, ligands, params, log=None): if (log is None) : log = sys.stdout if (params is None): params = master_phil().fetch().extract() adopt_init_args(self, locals()) self.xray_structure = fmodel.xray_structure.deep_copy_scatterers() xrs_ncs = fmodel.xray_structure.deep_copy_scatterers() from iotbx.pdb import hierarchy self.setup_maps() best_cc = 0 best_k = -1 best_ligand = None other_ligands = [] print("Identifying reference ligand...", file=log) def show_map_stats(prefix, stats): print(" %s: CC = %5.3f mean = %6.2f" % (prefix, stats.cc, stats.map_mean), file=log) for k, ligand in enumerate(ligands): atoms = ligand.atoms() start = self.get_sites_cc(atoms) show_map_stats("Ligand %d" % (k+1), start) if (start.cc > best_cc) and (start.cc > params.min_cc_reference): best_ligand = ligand best_k = k best_cc = best_cc if (best_ligand is None): raise Sorry("No ligand with acceptable CC (>%.2f) found." % params.min_cc_reference) best_atoms = best_ligand.atoms() for k, ligand in enumerate(ligands): if (ligand is not best_ligand): other_ligands.append(ligand) print("Copy #%d was the best, using that as reference" % (best_k+1), file=log) print("", file=log) sites_ref = best_ligand.atoms().extract_xyz() min_dist = sys.maxsize best_group = None shifts = ncs_operators.get_ncs_groups_shifts( self.xray_structure.sites_cart(), sites_ref ) for i, s in enumerate(shifts): dxyz = xyz_distance(s[0], (0,0,0)) if (dxyz < min_dist): best_group = i min_dist = dxyz array_of_str_selections = ncs_operators.get_array_of_str_selections()[0] if (best_group is not None): print("This appears to be bound to the selection \"%s\"" % \ array_of_str_selections[best_group], file=log) if best_group==0: pass else: print('best_group',best_group) assert 0 # always have the first ligand in the master self.new_ligands = [] for j, operator in enumerate(ncs_operators[0].copies): new_ligand = best_ligand.detached_copy() atoms = new_ligand.atoms() sites_new = operator.r.elems * sites_ref + operator.t.elems sites_mean = sites_new.mean() for other in other_ligands : sites_other_mean = other.atoms().extract_xyz().mean() dxyz = xyz_distance(sites_other_mean, sites_new.mean()) if (dxyz < params.min_dist_center): print(" operator %d specifies an existing ligand" % (j+1), file=log) break else : atoms.set_xyz(sites_new) stats_new = self.get_sites_cc(best_atoms, sites_new) show_map_stats("NCS op. %2d" % (j+1), stats_new) if (params.write_sampled_pdbs): lig_rg = hierarchy.residue_group() lig_rg.resseq = j+1 lig_rg.append_atom_group(new_ligand) f = open("ncs_ligand_%d.pdb" % (j+1), "w") for atom in new_ligand.atoms(): f.write(atom.format_atom_record()+"\n") f.close() # XXX ideally, given multiple high-quality ligand placements, we should # probably try sampling NCS operations for all of these and pick the # best new CC, rather than assuming that the best starting ligand will # superpose best on the density. if (stats_new.cc > params.min_cc): print(" operator %d has acceptable CC (%.3f)" % (j+1, stats_new.cc), file=log) self.new_ligands.append(new_ligand)
def __init__ (self, pdb_hierarchy, fmodel, ncs_operators, ligands, params, log=None) : if (log is None) : log = sys.stdout if (params is None) : params = master_phil().fetch().extract() adopt_init_args(self, locals()) self.xray_structure = fmodel.xray_structure.deep_copy_scatterers() xrs_ncs = fmodel.xray_structure.deep_copy_scatterers() from iotbx.pdb import hierarchy self.setup_maps() best_cc = 0 best_k = -1 best_ligand = None other_ligands = [] print >> log, "Identifying reference ligand..." def show_map_stats (prefix, stats) : print >> log, " %s: CC = %5.3f mean = %6.2f" % (prefix, stats.cc, stats.map_mean) for k, ligand in enumerate(ligands) : atoms = ligand.atoms() start = self.get_sites_cc(atoms) show_map_stats("Ligand %d" % (k+1), start) if (start.cc > best_cc) and (start.cc > params.min_cc_reference) : best_ligand = ligand best_k = k best_cc = best_cc if (best_ligand is None) : raise Sorry("No ligand with acceptable CC (>%.2f) found." % params.min_cc_reference) best_atoms = best_ligand.atoms() for k, ligand in enumerate(ligands) : if (ligand is not best_ligand) : other_ligands.append(ligand) print >> log, "Copy #%d was the best, using that as reference" % (best_k+1) print >> log, "" sites_ref = best_ligand.atoms().extract_xyz() min_dist = sys.maxint best_group = None for op_group in ncs_operators : dxyz = op_group.distance_from_center(sites_ref) if (dxyz < min_dist) : best_group = op_group min_dist = dxyz if (best_group is not None) : print >> log, "This appears to be bound to the selection \"%s\"" % \ best_group.selection_string self.new_ligands = [] for j, operator in enumerate(best_group.operators) : new_ligand = best_ligand.detached_copy() atoms = new_ligand.atoms() sites_new = operator.r.elems * sites_ref + operator.t.elems sites_mean = sites_new.mean() for other in other_ligands : sites_other_mean = other.atoms().extract_xyz().mean() dxyz = xyz_distance(sites_other_mean, sites_new.mean()) if (dxyz < params.min_dist_center) : print >> log, " operator %d specifies an existing ligand" % (j+1) break else : atoms.set_xyz(sites_new) stats_new = self.get_sites_cc(best_atoms, sites_new) show_map_stats("NCS op. %2d" % (j+1), stats_new) if (params.write_sampled_pdbs) : lig_rg = hierarchy.residue_group() lig_rg.resseq = j+1 lig_rg.append_atom_group(new_ligand) f = open("ncs_ligand_%d.pdb" % (j+1), "w") for atom in new_ligand.atoms() : f.write(atom.format_atom_record()+"\n") f.close() # XXX ideally, given multiple high-quality ligand placements, we should # probably try sampling NCS operations for all of these and pick the # best new CC, rather than assuming that the best starting ligand will # superpose best on the density. if (stats_new.cc > params.min_cc) : print >> log, " operator %d has acceptable CC (%.3f)" % (j+1, stats_new.cc) self.new_ligands.append(new_ligand)