Example #1
0
def combine_ligands_and_hierarchy(pdb_hierarchy, ligands, log=None):
  from iotbx.pdb import hierarchy
  if (log is None) : log = null_out()
  chain_id_counts = {}
  model = pdb_hierarchy.models()[0]
  for i_lig, ligand in enumerate(ligands):
    xyz_mean = ligand.atoms().extract_xyz().mean()
    best_chain = None
    min_dist = sys.maxsize
    for chain in model.chains():
      last_resseq = chain.residue_groups()[-1].resseq_as_int()
      if ((not chain.id in chain_id_counts) or
          (chain_id_counts[chain.id] < last_resseq)):
        chain_id_counts[chain.id] = last_resseq
      if (not chain.is_protein()) : continue
      chain_xyz_mean = chain.atoms().extract_xyz().mean()
      dist = xyz_distance(chain_xyz_mean, xyz_mean)
      if (dist < min_dist):
        min_dist = dist
        best_chain = chain
    best_chain_id = " "
    if (best_chain is not None):
      best_chain_id = best_chain.id
    new_chain = hierarchy.chain(id=best_chain_id)
    new_rg = hierarchy.residue_group()
    new_resseq = 1
    if (best_chain_id in chain_id_counts):
      new_resseq = chain_id_counts[best_chain_id] + 1
    print("  ligand %d: chain='%s' resseq=%s" % (i_lig+1,
      best_chain_id, new_resseq), file=log)
    new_rg.resseq = new_resseq
    new_rg.append_atom_group(ligand)
    new_chain.append_residue_group(new_rg)
    model.append_chain(new_chain)
    chain_id_counts[best_chain_id] = new_resseq
Example #2
0
def combine_ligands_and_hierarchy (pdb_hierarchy, ligands, log=None) :
  from iotbx.pdb import hierarchy
  if (log is None) : log = null_out()
  chain_id_counts = {}
  model = pdb_hierarchy.models()[0]
  for i_lig, ligand in enumerate(ligands) :
    xyz_mean = ligand.atoms().extract_xyz().mean()
    best_chain = None
    min_dist = sys.maxint
    for chain in model.chains() :
      last_resseq = chain.residue_groups()[-1].resseq_as_int()
      if ((not chain.id in chain_id_counts) or
          (chain_id_counts[chain.id] < last_resseq)) :
        chain_id_counts[chain.id] = last_resseq
      if (not chain.is_protein()) : continue
      chain_xyz_mean = chain.atoms().extract_xyz().mean()
      dist = xyz_distance(chain_xyz_mean, xyz_mean)
      if (dist < min_dist) :
        min_dist = dist
        best_chain = chain
    best_chain_id = " "
    if (best_chain is not None) :
      best_chain_id = best_chain.id
    new_chain = hierarchy.chain(id=best_chain_id)
    new_rg = hierarchy.residue_group()
    new_resseq = 1
    if (best_chain_id in chain_id_counts) :
      new_resseq = chain_id_counts[best_chain_id] + 1
    print >> log, "  ligand %d: chain='%s' resseq=%s" % (i_lig+1,
      best_chain_id, new_resseq)
    new_rg.resseq = new_resseq
    new_rg.append_atom_group(ligand)
    new_chain.append_residue_group(new_rg)
    model.append_chain(new_chain)
    chain_id_counts[best_chain_id] = new_resseq
Example #3
0
    def __init__(self, cif_block):
        crystal_symmetry_builder.__init__(self, cif_block)

        self.hierarchy = hierarchy.root()
        # These items are mandatory for the _atom_site loop, all others are optional
        type_symbol = self._wrap_loop_if_needed(cif_block,
                                                "_atom_site.type_symbol")
        atom_labels = self._wrap_loop_if_needed(cif_block,
                                                "_atom_site.auth_atom_id")
        if atom_labels is None:
            atom_labels = self._wrap_loop_if_needed(
                cif_block, "_atom_site.label_atom_id"
            )  # corresponds to chem comp atom name
        alt_id = self._wrap_loop_if_needed(
            cif_block, "_atom_site.label_alt_id")  # alternate conformer id
        label_asym_id = self._wrap_loop_if_needed(
            cif_block, "_atom_site.label_asym_id")  # chain id
        auth_asym_id = self._wrap_loop_if_needed(cif_block,
                                                 "_atom_site.auth_asym_id")
        if label_asym_id is None: label_asym_id = auth_asym_id
        if auth_asym_id is None: auth_asym_id = label_asym_id
        comp_id = self._wrap_loop_if_needed(cif_block,
                                            "_atom_site.auth_comp_id")
        if comp_id is None:
            comp_id = self._wrap_loop_if_needed(
                cif_block, "_atom_site.label_comp_id")  # residue name
        entity_id = self._wrap_loop_if_needed(cif_block,
                                              "_atom_site.label_entity_id")
        seq_id = self._wrap_loop_if_needed(cif_block, "_atom_site.auth_seq_id")
        if seq_id is None:
            seq_id = self._wrap_loop_if_needed(
                cif_block, "_atom_site.label_seq_id")  # residue number
        assert [atom_labels, alt_id, auth_asym_id, comp_id, entity_id,
                seq_id].count(None) == 0, "someting is not present"
        assert type_symbol is not None

        atom_site_fp = cif_block.get('_atom_site.phenix_scat_dispersion_real')
        atom_site_fdp = cif_block.get('_atom_site.phenix_scat_dispersion_imag')

        pdb_ins_code = cif_block.get(
            "_atom_site.pdbx_PDB_ins_code")  # insertion code
        model_ids = cif_block.get("_atom_site.pdbx_PDB_model_num")
        atom_site_id = cif_block.get("_atom_site.id")
        # only permitted values are ATOM or HETATM
        group_PDB = cif_block.get("_atom_site.group_PDB")
        # TODO: read esds
        B_iso_or_equiv = flex.double(
            self._wrap_loop_if_needed(cif_block, "_atom_site.B_iso_or_equiv"))
        cart_x = flex.double(
            self._wrap_loop_if_needed(cif_block, "_atom_site.Cartn_x"))
        cart_y = flex.double(
            self._wrap_loop_if_needed(cif_block, "_atom_site.Cartn_y"))
        cart_z = flex.double(
            self._wrap_loop_if_needed(cif_block, "_atom_site.Cartn_z"))
        occu = flex.double(
            self._wrap_loop_if_needed(cif_block, "_atom_site.occupancy"))
        formal_charge = self._wrap_loop_if_needed(
            cif_block, "_atom_site.pdbx_formal_charge")
        # anisotropic b-factors
        # TODO: read esds
        anisotrop_id = self._wrap_loop_if_needed(cif_block,
                                                 "_atom_site_anisotrop.id")
        adps = None
        if anisotrop_id is not None:
            u_ij = [
                self._wrap_loop_if_needed(
                    cif_block,
                    "_atom_site_anisotrop.U[%s][%s]" % (ij[0], ij[1]))
                for ij in ("11", "22", "33", "12", "13", "23")
            ]
            assert u_ij.count(None) in (0, 6)
            if u_ij.count(None) == 0:
                adps = u_ij
            else:
                assert u_ij.count(None) == 6
                b_ij = [
                    self._wrap_loop_if_needed(
                        cif_block,
                        "_atom_site_anisotrop.B[%s][%s]" % (ij[0], ij[1]))
                    for ij in ("11", "22", "33", "12", "13", "23")
                ]
                assert b_ij.count(None) in (0, 6)
                if b_ij.count(None) == 0:
                    adps = adptbx.b_as_u(b_ij)
                assert not (u_ij.count(None) and b_ij.count(None)
                            )  # illegal for both to be present
            if adps is not None:
                try:
                    adps = [flex.double(adp) for adp in adps]
                except ValueError as e:
                    raise CifBuilderError("Error interpreting ADPs: " + str(e))
                adps = flex.sym_mat3_double(*adps)
        py_adps = {}
        if anisotrop_id is not None and adps is not None:
            for an_id, adp in zip(list(anisotrop_id), list(adps)):
                py_adps[an_id] = adp
        current_model_id = None
        current_label_asym_id = None
        current_auth_asym_id = None
        current_residue_id = None
        current_ins_code = None

        for i_atom in range(atom_labels.size()):
            # model(s)
            last_model_id = current_model_id
            current_model_id = model_ids[i_atom]
            assert current_model_id is not None
            if current_model_id != last_model_id:
                model = hierarchy.model(id=current_model_id)
                self.hierarchy.append_model(model)

            # chain(s)
            last_label_asym_id = current_label_asym_id
            current_label_asym_id = label_asym_id[i_atom]
            assert current_label_asym_id is not None
            last_auth_asym_id = current_auth_asym_id
            current_auth_asym_id = auth_asym_id[i_atom]
            assert current_auth_asym_id not in [".", "?", " "], "mmCIF file contains " + \
              "record with empty auth_asym_id, which is wrong."
            assert current_label_asym_id is not None
            if (current_auth_asym_id != last_auth_asym_id
                    or current_model_id != last_model_id):
                chain = hierarchy.chain(id=current_auth_asym_id)
                model.append_chain(chain)
            else:
                assert current_auth_asym_id == last_auth_asym_id

            # residue_group(s)
            # defined by residue id and insertion code
            last_residue_id = current_residue_id
            current_residue_id = seq_id[i_atom]
            assert current_residue_id is not None
            last_ins_code = current_ins_code
            if pdb_ins_code is not None:
                current_ins_code = pdb_ins_code[i_atom]
                if current_ins_code in ("?", ".", None): current_ins_code = " "
            if (current_residue_id != last_residue_id
                    or current_ins_code != last_ins_code
                    or current_auth_asym_id != last_auth_asym_id
                    or current_model_id != last_model_id):
                try:
                    resseq = hy36encode(width=4, value=int(current_residue_id))
                except ValueError as e:
                    resseq = current_residue_id
                    assert len(resseq) == 4
                residue_group = hierarchy.residue_group(resseq=resseq,
                                                        icode=current_ins_code)
                chain.append_residue_group(residue_group)
                atom_groups = OrderedDict()  # reset atom_groups cache
            # atom_group(s)
            # defined by resname and altloc id
            current_altloc = alt_id[i_atom]
            if current_altloc == "." or current_altloc == "?":
                current_altloc = ""  # Main chain atoms
            current_resname = comp_id[i_atom]
            if (current_altloc, current_resname) not in atom_groups:
                atom_group = hierarchy.atom_group(altloc=current_altloc,
                                                  resname=current_resname)
                atom_groups[(current_altloc, current_resname)] = atom_group
                if current_altloc == "":
                    residue_group.insert_atom_group(0, atom_group)
                else:
                    residue_group.append_atom_group(atom_group)
            else:
                atom_group = atom_groups[(current_altloc, current_resname)]

            # atom(s)
            atom = hierarchy.atom()
            atom_group.append_atom(atom)
            atom.set_element(type_symbol[i_atom])
            atom.set_name(
                format_pdb_atom_name(atom_labels[i_atom], type_symbol[i_atom]))
            atom.set_xyz(new_xyz=(cart_x[i_atom], cart_y[i_atom],
                                  cart_z[i_atom]))
            atom.set_b(B_iso_or_equiv[i_atom])
            atom.set_occ(occu[i_atom])
            # hy36encode should go once the pdb.hierarchy has been
            # modified to no longer store fixed-width strings
            atom.set_serial(
                hy36encode(width=5, value=int(atom_site_id[i_atom])))
            # some code relies on an empty segid being 4 spaces
            atom.set_segid("    ")
            if group_PDB is not None and group_PDB[i_atom] == "HETATM":
                atom.hetero = True
            if formal_charge is not None:
                charge = formal_charge[i_atom]
                if charge not in ("?", "."):
                    if charge.endswith("-") or charge.startswith("-"):
                        sign = "-"
                    else:
                        sign = "+"
                    charge = charge.strip(" -+")
                    charge = int(charge)
                    if charge == 0: sign = ""
                    atom.set_charge("%i%s" % (charge, sign))
            if atom_site_fp is not None:
                fp = atom_site_fp[i_atom]
                if fp not in ("?", "."):
                    atom.set_fp(new_fp=float(fp))
            if atom_site_fdp is not None:
                fdp = atom_site_fdp[i_atom]
                if fdp not in ("?", "."):
                    atom.set_fdp(new_fdp=float(fdp))
            if anisotrop_id is not None and adps is not None:
                py_u_ij = py_adps.get(atom.serial.strip(), None)
                if py_u_ij is not None:
                    atom.set_uij(py_u_ij)
        if len(self.hierarchy.models()) == 1:
            # for compatibility with single-model PDB files
            self.hierarchy.models()[0].id = ""
Example #4
0
 def __init__(self,
     pdb_hierarchy,
     fmodel,
     ncs_operators,
     ligands,
     params,
     log=None):
   if (log is None) : log = sys.stdout
   if (params is None):
     params = master_phil().fetch().extract()
   adopt_init_args(self, locals())
   self.xray_structure = fmodel.xray_structure.deep_copy_scatterers()
   xrs_ncs = fmodel.xray_structure.deep_copy_scatterers()
   from iotbx.pdb import hierarchy
   self.setup_maps()
   best_cc = 0
   best_k = -1
   best_ligand = None
   other_ligands = []
   print("Identifying reference ligand...", file=log)
   def show_map_stats(prefix, stats):
     print("   %s: CC = %5.3f  mean = %6.2f" % (prefix, stats.cc,
       stats.map_mean), file=log)
   for k, ligand in enumerate(ligands):
     atoms = ligand.atoms()
     start = self.get_sites_cc(atoms)
     show_map_stats("Ligand %d" % (k+1), start)
     if (start.cc > best_cc) and (start.cc > params.min_cc_reference):
       best_ligand = ligand
       best_k = k
       best_cc = best_cc
   if (best_ligand is None):
     raise Sorry("No ligand with acceptable CC (>%.2f) found." %
       params.min_cc_reference)
   best_atoms = best_ligand.atoms()
   for k, ligand in enumerate(ligands):
     if (ligand is not best_ligand):
       other_ligands.append(ligand)
   print("Copy #%d was the best, using that as reference" % (best_k+1), file=log)
   print("", file=log)
   sites_ref = best_ligand.atoms().extract_xyz()
   min_dist = sys.maxsize
   best_group = None
   shifts = ncs_operators.get_ncs_groups_shifts(
     self.xray_structure.sites_cart(),
     sites_ref
     )
   for i, s in enumerate(shifts):
     dxyz = xyz_distance(s[0], (0,0,0))
     if (dxyz < min_dist):
       best_group = i
       min_dist = dxyz
   array_of_str_selections = ncs_operators.get_array_of_str_selections()[0]
   if (best_group is not None):
     print("This appears to be bound to the selection \"%s\"" % \
       array_of_str_selections[best_group], file=log)
   if best_group==0: pass
   else:
     print('best_group',best_group)
     assert 0
   # always have the first ligand in the master
   self.new_ligands = []
   for j, operator in enumerate(ncs_operators[0].copies):
     new_ligand = best_ligand.detached_copy()
     atoms = new_ligand.atoms()
     sites_new = operator.r.elems * sites_ref + operator.t.elems
     sites_mean = sites_new.mean()
     for other in other_ligands :
       sites_other_mean = other.atoms().extract_xyz().mean()
       dxyz = xyz_distance(sites_other_mean, sites_new.mean())
       if (dxyz < params.min_dist_center):
         print("  operator %d specifies an existing ligand" % (j+1), file=log)
         break
     else :
       atoms.set_xyz(sites_new)
       stats_new = self.get_sites_cc(best_atoms, sites_new)
       show_map_stats("NCS op. %2d" % (j+1), stats_new)
       if (params.write_sampled_pdbs):
         lig_rg = hierarchy.residue_group()
         lig_rg.resseq = j+1
         lig_rg.append_atom_group(new_ligand)
         f = open("ncs_ligand_%d.pdb" % (j+1), "w")
         for atom in new_ligand.atoms():
           f.write(atom.format_atom_record()+"\n")
         f.close()
       # XXX ideally, given multiple high-quality ligand placements, we should
       # probably try sampling NCS operations for all of these and pick the
       # best new CC, rather than assuming that the best starting ligand will
       # superpose best on the density.
       if (stats_new.cc > params.min_cc):
         print("  operator %d has acceptable CC (%.3f)" % (j+1,
           stats_new.cc), file=log)
         self.new_ligands.append(new_ligand)
Example #5
0
 def __init__ (self,
     pdb_hierarchy,
     fmodel,
     ncs_operators,
     ligands,
     params,
     log=None) :
   if (log is None) : log = sys.stdout
   if (params is None) :
     params = master_phil().fetch().extract()
   adopt_init_args(self, locals())
   self.xray_structure = fmodel.xray_structure.deep_copy_scatterers()
   xrs_ncs = fmodel.xray_structure.deep_copy_scatterers()
   from iotbx.pdb import hierarchy
   self.setup_maps()
   best_cc = 0
   best_k = -1
   best_ligand = None
   other_ligands = []
   print >> log, "Identifying reference ligand..."
   def show_map_stats (prefix, stats) :
     print >> log, "   %s: CC = %5.3f  mean = %6.2f" % (prefix, stats.cc,
       stats.map_mean)
   for k, ligand in enumerate(ligands) :
     atoms = ligand.atoms()
     start = self.get_sites_cc(atoms)
     show_map_stats("Ligand %d" % (k+1), start)
     if (start.cc > best_cc) and (start.cc > params.min_cc_reference) :
       best_ligand = ligand
       best_k = k
       best_cc = best_cc
   if (best_ligand is None) :
     raise Sorry("No ligand with acceptable CC (>%.2f) found." %
       params.min_cc_reference)
   best_atoms = best_ligand.atoms()
   for k, ligand in enumerate(ligands) :
     if (ligand is not best_ligand) :
       other_ligands.append(ligand)
   print >> log, "Copy #%d was the best, using that as reference" % (best_k+1)
   print >> log, ""
   sites_ref = best_ligand.atoms().extract_xyz()
   min_dist = sys.maxint
   best_group = None
   for op_group in ncs_operators :
     dxyz = op_group.distance_from_center(sites_ref)
     if (dxyz < min_dist) :
       best_group = op_group
       min_dist = dxyz
   if (best_group is not None) :
     print >> log, "This appears to be bound to the selection \"%s\"" % \
       best_group.selection_string
   self.new_ligands = []
   for j, operator in enumerate(best_group.operators) :
     new_ligand = best_ligand.detached_copy()
     atoms = new_ligand.atoms()
     sites_new = operator.r.elems * sites_ref + operator.t.elems
     sites_mean = sites_new.mean()
     for other in other_ligands :
       sites_other_mean = other.atoms().extract_xyz().mean()
       dxyz = xyz_distance(sites_other_mean, sites_new.mean())
       if (dxyz < params.min_dist_center) :
         print >> log, "  operator %d specifies an existing ligand" % (j+1)
         break
     else :
       atoms.set_xyz(sites_new)
       stats_new = self.get_sites_cc(best_atoms, sites_new)
       show_map_stats("NCS op. %2d" % (j+1), stats_new)
       if (params.write_sampled_pdbs) :
         lig_rg = hierarchy.residue_group()
         lig_rg.resseq = j+1
         lig_rg.append_atom_group(new_ligand)
         f = open("ncs_ligand_%d.pdb" % (j+1), "w")
         for atom in new_ligand.atoms() :
           f.write(atom.format_atom_record()+"\n")
         f.close()
       # XXX ideally, given multiple high-quality ligand placements, we should
       # probably try sampling NCS operations for all of these and pick the
       # best new CC, rather than assuming that the best starting ligand will
       # superpose best on the density.
       if (stats_new.cc > params.min_cc) :
         print >> log, "  operator %d has acceptable CC (%.3f)" % (j+1,
           stats_new.cc)
         self.new_ligands.append(new_ligand)