Exemple #1
0
def exercise_map_utils () :
  #
  # UNSTABLE
  #
  hierarchy, fmodel = get_1yjp_pdb_and_fmodel()
  sel_cache = hierarchy.atom_selection_cache()
  sele = sel_cache.selection("resseq 5 and (name CD or name OE1 or name NE2)")
  sele_all = sel_cache.selection("resseq 5")
  fmodel.xray_structure.scale_adp(factor=0.5, selection=sele)
  fmodel.update_xray_structure(update_f_calc=True)
  two_fofc_map, fofc_map = building.get_difference_maps(fmodel)
  map_stats = building.local_density_quality(
    fofc_map=fofc_map,
    two_fofc_map=two_fofc_map,
    atom_selection=sele_all,
    xray_structure=fmodel.xray_structure)
  out = StringIO()
  map_stats.show_atoms_outside_density(out=out, two_fofc_cutoff=3.0)
  pdb_strs = [ l.split(":")[0] for l in out.getvalue().splitlines() ]
  assert len(pdb_strs) > 0
  # XXX there seems to be a stochastic effect here
  #assert (pdb_strs == ['pdb=" CA  GLN A   5 "', 'pdb=" CB  GLN A   5 "',
  #                     'pdb=" CD  GLN A   5 "', 'pdb=" NE2 GLN A   5 "']), \
  #  pdb_strs
  fc_map = fmodel.map_coefficients(map_type="Fc").fft_map(
    resolution_factor=0.25).apply_sigma_scaling().real_map_unpadded()
  assert (map_stats.number_of_atoms_below_fofc_map_level() == 3)
  assert (map_stats.fraction_of_nearby_grid_points_above_cutoff() > 0.0)
  stats = building.get_model_map_stats(
    selection=sele_all,
    target_map=two_fofc_map,
    model_map=fc_map,
    unit_cell=fmodel.xray_structure.unit_cell(),
    sites_cart=fmodel.xray_structure.sites_cart(),
    pdb_atoms=hierarchy.atoms())
Exemple #2
0
def exercise_map_utils () :
  #
  # UNSTABLE 2x
  #
  hierarchy, fmodel = get_1yjp_pdb_and_fmodel()
  sel_cache = hierarchy.atom_selection_cache()
  sele = sel_cache.selection("resseq 5 and (name CD or name OE1 or name NE2)")
  sele_all = sel_cache.selection("resseq 5")
  fmodel.xray_structure.scale_adp(factor=0.5, selection=sele)
  fmodel.update_xray_structure(update_f_calc=True)
  two_fofc_map, fofc_map = building.get_difference_maps(fmodel)
  map_stats = building.local_density_quality(
    fofc_map=fofc_map,
    two_fofc_map=two_fofc_map,
    atom_selection=sele_all,
    xray_structure=fmodel.xray_structure)
  out = StringIO()
  map_stats.show_atoms_outside_density(out=out, two_fofc_cutoff=3.0)
  pdb_strs = [ l.split(":")[0] for l in out.getvalue().splitlines() ]
  assert len(pdb_strs) > 0
  # XXX there seems to be a stochastic effect here
  #assert (pdb_strs == ['pdb=" CA  GLN A   5 "', 'pdb=" CB  GLN A   5 "',
  #                     'pdb=" CD  GLN A   5 "', 'pdb=" NE2 GLN A   5 "']), \
  #  pdb_strs
  fc_map = fmodel.map_coefficients(map_type="Fc").fft_map(
    resolution_factor=0.25).apply_sigma_scaling().real_map_unpadded()
  assert (map_stats.number_of_atoms_below_fofc_map_level() == 3)
  assert (map_stats.fraction_of_nearby_grid_points_above_cutoff() > 0.0)
  stats = building.get_model_map_stats(
    selection=sele_all,
    target_map=two_fofc_map,
    model_map=fc_map,
    unit_cell=fmodel.xray_structure.unit_cell(),
    sites_cart=fmodel.xray_structure.sites_cart(),
    pdb_atoms=hierarchy.atoms())
Exemple #3
0
def filter_before_build (
    pdb_hierarchy,
    fmodel,
    geometry_restraints_manager,
    selection=None,
    params=None,
    verbose=True,
    log=sys.stdout) :
  """
  Pick residues suitable for building alternate conformations - by default,
  this means no MolProbity/geometry outliers, good fit to map, no missing
  atoms, and no pre-existing alternates, but with significant difference
  density nearby.
  """
  from mmtbx.validation import molprobity
  from mmtbx.rotamer import rotamer_eval
  import mmtbx.monomer_library.server
  from mmtbx import building
  from iotbx.pdb import common_residue_names_get_class
  from scitbx.array_family import flex
  if (selection is None) :
    selection = flex.bool(fmodel.xray_structure.scatterers().size(), True)
  pdb_atoms = pdb_hierarchy.atoms()
  assert (pdb_atoms.size() == fmodel.xray_structure.scatterers().size())
  pdb_atoms.reset_i_seq()
  full_validation = molprobity.molprobity(
    pdb_hierarchy=pdb_hierarchy,
    fmodel=fmodel,
    geometry_restraints_manager=geometry_restraints_manager,
    outliers_only=False,
    rotamer_library="8000")
  if (verbose) :
    full_validation.show(out=log)
  multi_criterion = full_validation.as_multi_criterion_view()
  if (params is None) :
    params = libtbx.phil.parse(filter_params_str).extract()
  mon_lib_srv = mmtbx.monomer_library.server.server()
  two_fofc_map, fofc_map = building.get_difference_maps(fmodel=fmodel)
  residues = []
  filters = params.discard_outliers
  make_sub_header("Identifying candidates for building", out=log)
  # TODO parallelize
  for chain in pdb_hierarchy.only_model().chains() :
    if (not chain.is_protein()) :
      continue
    for residue_group in chain.residue_groups() :
      atom_groups = residue_group.atom_groups()
      id_str = residue_group.id_str()
      i_seqs = residue_group.atoms().extract_i_seq()
      residue_sel = selection.select(i_seqs)
      if (not residue_sel.all_eq(True)) :
        continue
      if (len(atom_groups) > 1) :
        print >> log, "  %s is already multi-conformer" % id_str
        continue
      atom_group = atom_groups[0]
      res_class = common_residue_names_get_class(atom_group.resname)
      if (res_class != "common_amino_acid") :
        print >> log, "  %s: non-standard residue" % id_str
        continue
      missing_atoms = rotamer_eval.eval_residue_completeness(
        residue=atom_group,
        mon_lib_srv=mon_lib_srv,
        ignore_hydrogens=True)
      if (len(missing_atoms) > 0) :
        # residues modeled as pseudo-ALA are allowed by default; partially
        # missing sidechains are more problematic
        if ((building.is_stub_residue(atom_group)) and
            (not params.ignore_stub_residues)) :
          pass
        else :
          print >> log, "  %s: missing or incomplete sidechain" % \
            (id_str, len(missing_atoms))
          continue
      validation = multi_criterion.get_residue_group_data(residue_group)
      is_outlier = is_validation_outlier(validation, params)
      if (is_outlier) :
        print >> log, "  %s" % str(validation)
        continue
      if (params.use_difference_map) :
        i_seqs_no_hd = building.get_non_hydrogen_atom_indices(residue_group)
        map_stats = building.local_density_quality(
          fofc_map=fofc_map,
          two_fofc_map=two_fofc_map,
          atom_selection=i_seqs_no_hd,
          xray_structure=fmodel.xray_structure,
          radius=params.sampling_radius)
        if ((map_stats.number_of_atoms_below_fofc_map_level() == 0) and
            (map_stats.fraction_of_nearby_grid_points_above_cutoff()==0)) :
          if (verbose) :
            print >> log, "  no difference density for %s" % id_str
          continue
      residues.append(residue_group.only_atom_group())
  if (len(residues) == 0) :
    raise Sorry("No residues passed the filtering criteria.")
  print >> log, ""
  print >> log, "Alternate conformations will be tried for %d residue(s):" % \
      len(residues)
  building.show_chain_resseq_ranges(residues, out=log, prefix="  ")
  print >> log, ""
  return residues
Exemple #4
0
 def __init__(self,
     fmodel,
     params,
     mp_params,
     pdb_hierarchy,
     processed_pdb_file,
     selection=None,
     cif_objects=(),
     verbose=True,
     debug=False,
     out=None):
   if (out is None) : out = sys.stdout
   adopt_init_args(self, locals())
   self.asynchronous_output = False
   from mmtbx.rotamer import rotamer_eval
   from scitbx.array_family import flex
   assert (processed_pdb_file is not None) or (len(pdb_file_names) > 0)
   assert (0 < self.params.window_radius <= 4)
   self.pdb_hierarchy = pdb_hierarchy
   self.processed_pdb_file = processed_pdb_file
   self.get_processed_pdb_file(log=out)
   self.sites_cart = self.pdb_hierarchy.atoms().extract_xyz().deep_copy()
   if (self.selection is None):
     self.selection = flex.bool(self.sites_cart.size(), True)
   self.min_required_deviation = self.params.min_deviation
   if (self.min_required_deviation is Auto):
     self.min_required_deviation = fmodel.f_obs().d_min() / 2
   self._ensembles = []
   self.nproc_1 = self.nproc_2 = 1
   two_fofc_map, fofc_map = mmtbx.building.get_difference_maps(fmodel=fmodel)
   windows = []
   r = rotamer_eval.RotamerEval(data_version="8000")
   exclude_resnames = []
   if (params.exclude_resnames is not None):
     exclude_resnames = [ n.upper() for n in params.exclude_resnames ]
   for chain in self.pdb_hierarchy.only_model().chains():
     if not chain.is_protein():
       continue
     residues = chain.residue_groups()
     fragments = alt_confs.fragment_single_conformer_chain(residues)
     for fragment_residues in fragments :
       start = params.window_radius
       end = - params.window_radius
       for i_res, residue in enumerate(fragment_residues[start:end]):
         j_res = i_res + start
         atom_groups = residue.atom_groups()
         main_conf = atom_groups[0]
         if (main_conf.resname.upper() in exclude_resnames):
           continue
         residue_id = main_conf.id_str()
         ag_i_seqs = main_conf.atoms().extract_i_seq()
         if (not self.selection.select(ag_i_seqs).all_eq(True)):
           continue
         if (len(atom_groups) != 1):
           if (self.verbose):
             print("  residue %s already has multiple conformations"%\
               residue_id, file=out)
           continue
         ag_i_seqs_no_hd = flex.size_t()
         for atom in main_conf.atoms():
           if (atom.element.strip() not in ["H","D"]):
             ag_i_seqs_no_hd.append(atom.i_seq)
         # XXX this is probably not optimal; what should I do about the
         # adjacent residues?  it would be good to check Ramachandran plot too
         if (self.params.prefilter.rotameric_only):
           n_outliers = alt_confs.score_rotamers(hierarchy=hierarchy,
             selection=ag_i_seqs)
           if (n_outliers > 0):
             if (self.verbose):
               print("  residue %s is a rotamer outlier" % residue_id, file=out)
             continue
         if (self.params.prefilter.use_difference_map):
           map_stats = building.local_density_quality(
             fofc_map=fofc_map,
             two_fofc_map=two_fofc_map,
             atom_selection=ag_i_seqs_no_hd,
             xray_structure=fmodel.xray_structure,
             radius=self.params.prefilter.sampling_radius)
           if ((map_stats.number_of_atoms_in_difference_holes() == 0) and
               (map_stats.fraction_of_nearby_grid_points_above_cutoff()==0)):
             if (self.verbose):
               print("  no difference density for %s" % residue_id, file=out)
             continue
         window_selection = flex.size_t()
         offset = - self.params.window_radius
         while (offset <= self.params.window_radius):
           adjacent_group = fragment_residues[j_res+offset].atom_groups()[0]
           window_selection.extend(adjacent_group.atoms().extract_i_seq())
           offset += 1
         windows.append(residue_window(
           residue_id_str=residue_id,
           selection=window_selection,
           residue_selection=ag_i_seqs_no_hd,
           sites_reference=self.sites_cart.select(selection),
           window_radius=self.params.window_radius))
   if (len(windows) == 0):
     raise Sorry("No peptide segments meeting the filtering criteria could "+
       "be extracted from the selected atoms.")
   else :
     print("%d fragments will be refined." % len(windows), file=out)
   if (self.mp_params.nproc == 1):
     pass
   elif (self.mp_params.technology == "multiprocessing"):
     if (self.params.n_trials == 1) and (len(self.params.partial_occupancy) == 1):
       # only one refinement per window, so parallelize residue iteration
       self.nproc_1 = self.mp_params.nproc
     else :
       # multiple refinements per window, so parallelize at that level
       # FIXME actually, this needs to be smarter - if the number of
       # available processors is greater than the number of refinements per
       # window, it will be more efficient to parallelize the window loop
       self.nproc_2 = self.mp_params.nproc
   else :
     # queuing system, so we can only parallelize residue iteration
     self.nproc_1 = self.mp_params.nproc
     self.out = null_out()
     self.processed_pdb_file = None
   print("", file=out)
   alt_confs.print_trial_header(out)
   ensembles = []
   if (self.nproc_1 == 1):
     self.asynchronous_output = True
     for window in windows :
       ens = self.refine_window(window)
       ensembles.append(ens)
   else :
     ensembles = easy_mp.parallel_map(
       func=self.refine_window,
       iterable=windows,
       processes=self.nproc_1,
       qsub_command=mp_params.qsub_command,
       method=mp_params.technology)
   self._ensembles = [ e for e in ensembles if (e is not None) ]
   # XXX reassert order
   print("", file=out)
   if (len(self._ensembles) == 0):
     print("WARNING: no ensembles passed filtering step", file=out)
     print("", file=out)
   self._ensembles.sort(lambda a,b: a.selection[0] < b.selection[0])
   self.processed_pdb_file = processed_pdb_file
   if (debug):
     for k, ens in enumerate(filtered):
       pdb_out = ens.dump_pdb_file(
         pdb_hierarchy=pdb_hierarchy,
         crystal_symmetry=fmodel.f_obs())
       print("wrote %s" % pdb_out, file=out)
 def __init__ (self,
     fmodel,
     params,
     mp_params,
     pdb_hierarchy,
     processed_pdb_file,
     selection=None,
     cif_objects=(),
     verbose=True,
     debug=False,
     out=None) :
   if (out is None) : out = sys.stdout
   adopt_init_args(self, locals())
   self.asynchronous_output = False
   from mmtbx.rotamer import rotamer_eval
   from scitbx.array_family import flex
   assert (processed_pdb_file is not None) or (len(pdb_file_names) > 0)
   assert (0 < self.params.window_radius <= 4)
   self.pdb_hierarchy = pdb_hierarchy
   self.processed_pdb_file = processed_pdb_file
   self.get_processed_pdb_file(log=out)
   self.sites_cart = self.pdb_hierarchy.atoms().extract_xyz().deep_copy()
   if (self.selection is None) :
     self.selection = flex.bool(self.sites_cart.size(), True)
   self.min_required_deviation = self.params.min_deviation
   if (self.min_required_deviation is Auto) :
     self.min_required_deviation = fmodel.f_obs().d_min() / 2
   self._ensembles = []
   self.nproc_1 = self.nproc_2 = 1
   two_fofc_map, fofc_map = mmtbx.building.get_difference_maps(fmodel=fmodel)
   windows = []
   r = rotamer_eval.RotamerEval(data_version="8000")
   exclude_resnames = []
   if (params.exclude_resnames is not None) :
     exclude_resnames = [ n.upper() for n in params.exclude_resnames ]
   for chain in self.pdb_hierarchy.only_model().chains() :
     main_conf = chain.conformers()[0]
     if (not main_conf.is_protein()) :
       continue
     residues = chain.residue_groups()
     fragments = alt_confs.fragment_single_conformer_chain(residues)
     for fragment_residues in fragments :
       start = params.window_radius
       end = - params.window_radius
       for i_res, residue in enumerate(fragment_residues[start:end]) :
         j_res = i_res + start
         atom_groups = residue.atom_groups()
         main_conf = atom_groups[0]
         if (main_conf.resname.upper() in exclude_resnames) :
           continue
         residue_id = main_conf.id_str()
         ag_i_seqs = main_conf.atoms().extract_i_seq()
         if (not self.selection.select(ag_i_seqs).all_eq(True)) :
           continue
         if (len(atom_groups) != 1) :
           if (self.verbose) :
             print >> out, "  residue %s already has multiple conformations"%\
               residue_id
           continue
         ag_i_seqs_no_hd = flex.size_t()
         for atom in main_conf.atoms() :
           if (atom.element.strip() not in ["H","D"]) :
             ag_i_seqs_no_hd.append(atom.i_seq)
         # XXX this is probably not optimal; what should I do about the
         # adjacent residues?  it would be good to check Ramachandran plot too
         if (self.params.prefilter.rotameric_only) :
           n_outliers = alt_confs.score_rotamers(hierarchy=hierarchy,
             selection=ag_i_seqs)
           if (n_outliers > 0) :
             if (self.verbose) :
               print >> out, "  residue %s is a rotamer outlier" % residue_id
             continue
         if (self.params.prefilter.use_difference_map) :
           map_stats = building.local_density_quality(
             fofc_map=fofc_map,
             two_fofc_map=two_fofc_map,
             atom_selection=ag_i_seqs_no_hd,
             xray_structure=fmodel.xray_structure,
             radius=self.params.prefilter.sampling_radius)
           if ((map_stats.number_of_atoms_in_difference_holes() == 0) and
               (map_stats.fraction_of_nearby_grid_points_above_cutoff()==0)) :
             if (self.verbose) :
               print >> out, "  no difference density for %s" % residue_id
             continue
         window_selection = flex.size_t()
         offset = - self.params.window_radius
         while (offset <= self.params.window_radius) :
           adjacent_group = fragment_residues[j_res+offset].atom_groups()[0]
           window_selection.extend(adjacent_group.atoms().extract_i_seq())
           offset += 1
         windows.append(residue_window(
           residue_id_str=residue_id,
           selection=window_selection,
           residue_selection=ag_i_seqs_no_hd,
           sites_reference=self.sites_cart.select(selection),
           window_radius=self.params.window_radius))
   if (len(windows) == 0) :
     raise Sorry("No peptide segments meeting the filtering criteria could "+
       "be extracted from the selected atoms.")
   else :
     print >> out, "%d fragments will be refined." % len(windows)
   if (self.mp_params.nproc == 1) :
     pass
   elif (self.mp_params.technology == "multiprocessing") :
     if (self.params.n_trials == 1) and (len(self.params.partial_occupancy) == 1) :
       # only one refinement per window, so parallelize residue iteration
       self.nproc_1 = self.mp_params.nproc
     else :
       # multiple refinements per window, so parallelize at that level
       # FIXME actually, this needs to be smarter - if the number of
       # available processors is greater than the number of refinements per
       # window, it will be more efficient to parallelize the window loop
       self.nproc_2 = self.mp_params.nproc
   else :
     # queuing system, so we can only parallelize residue iteration
     self.nproc_1 = self.mp_params.nproc
     self.out = null_out()
     self.processed_pdb_file = None
   print >> out, ""
   alt_confs.print_trial_header(out)
   ensembles = []
   if (self.nproc_1 == 1) :
     self.asynchronous_output = True
     for window in windows :
       ens = self.refine_window(window)
       ensembles.append(ens)
   else :
     ensembles = easy_mp.parallel_map(
       func=self.refine_window,
       iterable=windows,
       processes=self.nproc_1,
       qsub_command=mp_params.qsub_command,
       method=mp_params.technology)
   self._ensembles = [ e for e in ensembles if (e is not None) ]
   # XXX reassert order
   print >> out, ""
   if (len(self._ensembles) == 0) :
     print >> out, "WARNING: no ensembles passed filtering step"
     print >> out, ""
   self._ensembles.sort(lambda a,b: a.selection[0] < b.selection[0])
   self.processed_pdb_file = processed_pdb_file
   if (debug) :
     for k, ens in enumerate(filtered) :
       pdb_out = ens.dump_pdb_file(
         pdb_hierarchy=pdb_hierarchy,
         crystal_symmetry=fmodel.f_obs())
       print >> out, "wrote %s" % pdb_out