def exercise_map_utils () : # # UNSTABLE # hierarchy, fmodel = get_1yjp_pdb_and_fmodel() sel_cache = hierarchy.atom_selection_cache() sele = sel_cache.selection("resseq 5 and (name CD or name OE1 or name NE2)") sele_all = sel_cache.selection("resseq 5") fmodel.xray_structure.scale_adp(factor=0.5, selection=sele) fmodel.update_xray_structure(update_f_calc=True) two_fofc_map, fofc_map = building.get_difference_maps(fmodel) map_stats = building.local_density_quality( fofc_map=fofc_map, two_fofc_map=two_fofc_map, atom_selection=sele_all, xray_structure=fmodel.xray_structure) out = StringIO() map_stats.show_atoms_outside_density(out=out, two_fofc_cutoff=3.0) pdb_strs = [ l.split(":")[0] for l in out.getvalue().splitlines() ] assert len(pdb_strs) > 0 # XXX there seems to be a stochastic effect here #assert (pdb_strs == ['pdb=" CA GLN A 5 "', 'pdb=" CB GLN A 5 "', # 'pdb=" CD GLN A 5 "', 'pdb=" NE2 GLN A 5 "']), \ # pdb_strs fc_map = fmodel.map_coefficients(map_type="Fc").fft_map( resolution_factor=0.25).apply_sigma_scaling().real_map_unpadded() assert (map_stats.number_of_atoms_below_fofc_map_level() == 3) assert (map_stats.fraction_of_nearby_grid_points_above_cutoff() > 0.0) stats = building.get_model_map_stats( selection=sele_all, target_map=two_fofc_map, model_map=fc_map, unit_cell=fmodel.xray_structure.unit_cell(), sites_cart=fmodel.xray_structure.sites_cart(), pdb_atoms=hierarchy.atoms())
def exercise_map_utils () : # # UNSTABLE 2x # hierarchy, fmodel = get_1yjp_pdb_and_fmodel() sel_cache = hierarchy.atom_selection_cache() sele = sel_cache.selection("resseq 5 and (name CD or name OE1 or name NE2)") sele_all = sel_cache.selection("resseq 5") fmodel.xray_structure.scale_adp(factor=0.5, selection=sele) fmodel.update_xray_structure(update_f_calc=True) two_fofc_map, fofc_map = building.get_difference_maps(fmodel) map_stats = building.local_density_quality( fofc_map=fofc_map, two_fofc_map=two_fofc_map, atom_selection=sele_all, xray_structure=fmodel.xray_structure) out = StringIO() map_stats.show_atoms_outside_density(out=out, two_fofc_cutoff=3.0) pdb_strs = [ l.split(":")[0] for l in out.getvalue().splitlines() ] assert len(pdb_strs) > 0 # XXX there seems to be a stochastic effect here #assert (pdb_strs == ['pdb=" CA GLN A 5 "', 'pdb=" CB GLN A 5 "', # 'pdb=" CD GLN A 5 "', 'pdb=" NE2 GLN A 5 "']), \ # pdb_strs fc_map = fmodel.map_coefficients(map_type="Fc").fft_map( resolution_factor=0.25).apply_sigma_scaling().real_map_unpadded() assert (map_stats.number_of_atoms_below_fofc_map_level() == 3) assert (map_stats.fraction_of_nearby_grid_points_above_cutoff() > 0.0) stats = building.get_model_map_stats( selection=sele_all, target_map=two_fofc_map, model_map=fc_map, unit_cell=fmodel.xray_structure.unit_cell(), sites_cart=fmodel.xray_structure.sites_cart(), pdb_atoms=hierarchy.atoms())
def filter_before_build ( pdb_hierarchy, fmodel, geometry_restraints_manager, selection=None, params=None, verbose=True, log=sys.stdout) : """ Pick residues suitable for building alternate conformations - by default, this means no MolProbity/geometry outliers, good fit to map, no missing atoms, and no pre-existing alternates, but with significant difference density nearby. """ from mmtbx.validation import molprobity from mmtbx.rotamer import rotamer_eval import mmtbx.monomer_library.server from mmtbx import building from iotbx.pdb import common_residue_names_get_class from scitbx.array_family import flex if (selection is None) : selection = flex.bool(fmodel.xray_structure.scatterers().size(), True) pdb_atoms = pdb_hierarchy.atoms() assert (pdb_atoms.size() == fmodel.xray_structure.scatterers().size()) pdb_atoms.reset_i_seq() full_validation = molprobity.molprobity( pdb_hierarchy=pdb_hierarchy, fmodel=fmodel, geometry_restraints_manager=geometry_restraints_manager, outliers_only=False, rotamer_library="8000") if (verbose) : full_validation.show(out=log) multi_criterion = full_validation.as_multi_criterion_view() if (params is None) : params = libtbx.phil.parse(filter_params_str).extract() mon_lib_srv = mmtbx.monomer_library.server.server() two_fofc_map, fofc_map = building.get_difference_maps(fmodel=fmodel) residues = [] filters = params.discard_outliers make_sub_header("Identifying candidates for building", out=log) # TODO parallelize for chain in pdb_hierarchy.only_model().chains() : if (not chain.is_protein()) : continue for residue_group in chain.residue_groups() : atom_groups = residue_group.atom_groups() id_str = residue_group.id_str() i_seqs = residue_group.atoms().extract_i_seq() residue_sel = selection.select(i_seqs) if (not residue_sel.all_eq(True)) : continue if (len(atom_groups) > 1) : print >> log, " %s is already multi-conformer" % id_str continue atom_group = atom_groups[0] res_class = common_residue_names_get_class(atom_group.resname) if (res_class != "common_amino_acid") : print >> log, " %s: non-standard residue" % id_str continue missing_atoms = rotamer_eval.eval_residue_completeness( residue=atom_group, mon_lib_srv=mon_lib_srv, ignore_hydrogens=True) if (len(missing_atoms) > 0) : # residues modeled as pseudo-ALA are allowed by default; partially # missing sidechains are more problematic if ((building.is_stub_residue(atom_group)) and (not params.ignore_stub_residues)) : pass else : print >> log, " %s: missing or incomplete sidechain" % \ (id_str, len(missing_atoms)) continue validation = multi_criterion.get_residue_group_data(residue_group) is_outlier = is_validation_outlier(validation, params) if (is_outlier) : print >> log, " %s" % str(validation) continue if (params.use_difference_map) : i_seqs_no_hd = building.get_non_hydrogen_atom_indices(residue_group) map_stats = building.local_density_quality( fofc_map=fofc_map, two_fofc_map=two_fofc_map, atom_selection=i_seqs_no_hd, xray_structure=fmodel.xray_structure, radius=params.sampling_radius) if ((map_stats.number_of_atoms_below_fofc_map_level() == 0) and (map_stats.fraction_of_nearby_grid_points_above_cutoff()==0)) : if (verbose) : print >> log, " no difference density for %s" % id_str continue residues.append(residue_group.only_atom_group()) if (len(residues) == 0) : raise Sorry("No residues passed the filtering criteria.") print >> log, "" print >> log, "Alternate conformations will be tried for %d residue(s):" % \ len(residues) building.show_chain_resseq_ranges(residues, out=log, prefix=" ") print >> log, "" return residues
def __init__(self, fmodel, params, mp_params, pdb_hierarchy, processed_pdb_file, selection=None, cif_objects=(), verbose=True, debug=False, out=None): if (out is None) : out = sys.stdout adopt_init_args(self, locals()) self.asynchronous_output = False from mmtbx.rotamer import rotamer_eval from scitbx.array_family import flex assert (processed_pdb_file is not None) or (len(pdb_file_names) > 0) assert (0 < self.params.window_radius <= 4) self.pdb_hierarchy = pdb_hierarchy self.processed_pdb_file = processed_pdb_file self.get_processed_pdb_file(log=out) self.sites_cart = self.pdb_hierarchy.atoms().extract_xyz().deep_copy() if (self.selection is None): self.selection = flex.bool(self.sites_cart.size(), True) self.min_required_deviation = self.params.min_deviation if (self.min_required_deviation is Auto): self.min_required_deviation = fmodel.f_obs().d_min() / 2 self._ensembles = [] self.nproc_1 = self.nproc_2 = 1 two_fofc_map, fofc_map = mmtbx.building.get_difference_maps(fmodel=fmodel) windows = [] r = rotamer_eval.RotamerEval(data_version="8000") exclude_resnames = [] if (params.exclude_resnames is not None): exclude_resnames = [ n.upper() for n in params.exclude_resnames ] for chain in self.pdb_hierarchy.only_model().chains(): if not chain.is_protein(): continue residues = chain.residue_groups() fragments = alt_confs.fragment_single_conformer_chain(residues) for fragment_residues in fragments : start = params.window_radius end = - params.window_radius for i_res, residue in enumerate(fragment_residues[start:end]): j_res = i_res + start atom_groups = residue.atom_groups() main_conf = atom_groups[0] if (main_conf.resname.upper() in exclude_resnames): continue residue_id = main_conf.id_str() ag_i_seqs = main_conf.atoms().extract_i_seq() if (not self.selection.select(ag_i_seqs).all_eq(True)): continue if (len(atom_groups) != 1): if (self.verbose): print(" residue %s already has multiple conformations"%\ residue_id, file=out) continue ag_i_seqs_no_hd = flex.size_t() for atom in main_conf.atoms(): if (atom.element.strip() not in ["H","D"]): ag_i_seqs_no_hd.append(atom.i_seq) # XXX this is probably not optimal; what should I do about the # adjacent residues? it would be good to check Ramachandran plot too if (self.params.prefilter.rotameric_only): n_outliers = alt_confs.score_rotamers(hierarchy=hierarchy, selection=ag_i_seqs) if (n_outliers > 0): if (self.verbose): print(" residue %s is a rotamer outlier" % residue_id, file=out) continue if (self.params.prefilter.use_difference_map): map_stats = building.local_density_quality( fofc_map=fofc_map, two_fofc_map=two_fofc_map, atom_selection=ag_i_seqs_no_hd, xray_structure=fmodel.xray_structure, radius=self.params.prefilter.sampling_radius) if ((map_stats.number_of_atoms_in_difference_holes() == 0) and (map_stats.fraction_of_nearby_grid_points_above_cutoff()==0)): if (self.verbose): print(" no difference density for %s" % residue_id, file=out) continue window_selection = flex.size_t() offset = - self.params.window_radius while (offset <= self.params.window_radius): adjacent_group = fragment_residues[j_res+offset].atom_groups()[0] window_selection.extend(adjacent_group.atoms().extract_i_seq()) offset += 1 windows.append(residue_window( residue_id_str=residue_id, selection=window_selection, residue_selection=ag_i_seqs_no_hd, sites_reference=self.sites_cart.select(selection), window_radius=self.params.window_radius)) if (len(windows) == 0): raise Sorry("No peptide segments meeting the filtering criteria could "+ "be extracted from the selected atoms.") else : print("%d fragments will be refined." % len(windows), file=out) if (self.mp_params.nproc == 1): pass elif (self.mp_params.technology == "multiprocessing"): if (self.params.n_trials == 1) and (len(self.params.partial_occupancy) == 1): # only one refinement per window, so parallelize residue iteration self.nproc_1 = self.mp_params.nproc else : # multiple refinements per window, so parallelize at that level # FIXME actually, this needs to be smarter - if the number of # available processors is greater than the number of refinements per # window, it will be more efficient to parallelize the window loop self.nproc_2 = self.mp_params.nproc else : # queuing system, so we can only parallelize residue iteration self.nproc_1 = self.mp_params.nproc self.out = null_out() self.processed_pdb_file = None print("", file=out) alt_confs.print_trial_header(out) ensembles = [] if (self.nproc_1 == 1): self.asynchronous_output = True for window in windows : ens = self.refine_window(window) ensembles.append(ens) else : ensembles = easy_mp.parallel_map( func=self.refine_window, iterable=windows, processes=self.nproc_1, qsub_command=mp_params.qsub_command, method=mp_params.technology) self._ensembles = [ e for e in ensembles if (e is not None) ] # XXX reassert order print("", file=out) if (len(self._ensembles) == 0): print("WARNING: no ensembles passed filtering step", file=out) print("", file=out) self._ensembles.sort(lambda a,b: a.selection[0] < b.selection[0]) self.processed_pdb_file = processed_pdb_file if (debug): for k, ens in enumerate(filtered): pdb_out = ens.dump_pdb_file( pdb_hierarchy=pdb_hierarchy, crystal_symmetry=fmodel.f_obs()) print("wrote %s" % pdb_out, file=out)
def __init__ (self, fmodel, params, mp_params, pdb_hierarchy, processed_pdb_file, selection=None, cif_objects=(), verbose=True, debug=False, out=None) : if (out is None) : out = sys.stdout adopt_init_args(self, locals()) self.asynchronous_output = False from mmtbx.rotamer import rotamer_eval from scitbx.array_family import flex assert (processed_pdb_file is not None) or (len(pdb_file_names) > 0) assert (0 < self.params.window_radius <= 4) self.pdb_hierarchy = pdb_hierarchy self.processed_pdb_file = processed_pdb_file self.get_processed_pdb_file(log=out) self.sites_cart = self.pdb_hierarchy.atoms().extract_xyz().deep_copy() if (self.selection is None) : self.selection = flex.bool(self.sites_cart.size(), True) self.min_required_deviation = self.params.min_deviation if (self.min_required_deviation is Auto) : self.min_required_deviation = fmodel.f_obs().d_min() / 2 self._ensembles = [] self.nproc_1 = self.nproc_2 = 1 two_fofc_map, fofc_map = mmtbx.building.get_difference_maps(fmodel=fmodel) windows = [] r = rotamer_eval.RotamerEval(data_version="8000") exclude_resnames = [] if (params.exclude_resnames is not None) : exclude_resnames = [ n.upper() for n in params.exclude_resnames ] for chain in self.pdb_hierarchy.only_model().chains() : main_conf = chain.conformers()[0] if (not main_conf.is_protein()) : continue residues = chain.residue_groups() fragments = alt_confs.fragment_single_conformer_chain(residues) for fragment_residues in fragments : start = params.window_radius end = - params.window_radius for i_res, residue in enumerate(fragment_residues[start:end]) : j_res = i_res + start atom_groups = residue.atom_groups() main_conf = atom_groups[0] if (main_conf.resname.upper() in exclude_resnames) : continue residue_id = main_conf.id_str() ag_i_seqs = main_conf.atoms().extract_i_seq() if (not self.selection.select(ag_i_seqs).all_eq(True)) : continue if (len(atom_groups) != 1) : if (self.verbose) : print >> out, " residue %s already has multiple conformations"%\ residue_id continue ag_i_seqs_no_hd = flex.size_t() for atom in main_conf.atoms() : if (atom.element.strip() not in ["H","D"]) : ag_i_seqs_no_hd.append(atom.i_seq) # XXX this is probably not optimal; what should I do about the # adjacent residues? it would be good to check Ramachandran plot too if (self.params.prefilter.rotameric_only) : n_outliers = alt_confs.score_rotamers(hierarchy=hierarchy, selection=ag_i_seqs) if (n_outliers > 0) : if (self.verbose) : print >> out, " residue %s is a rotamer outlier" % residue_id continue if (self.params.prefilter.use_difference_map) : map_stats = building.local_density_quality( fofc_map=fofc_map, two_fofc_map=two_fofc_map, atom_selection=ag_i_seqs_no_hd, xray_structure=fmodel.xray_structure, radius=self.params.prefilter.sampling_radius) if ((map_stats.number_of_atoms_in_difference_holes() == 0) and (map_stats.fraction_of_nearby_grid_points_above_cutoff()==0)) : if (self.verbose) : print >> out, " no difference density for %s" % residue_id continue window_selection = flex.size_t() offset = - self.params.window_radius while (offset <= self.params.window_radius) : adjacent_group = fragment_residues[j_res+offset].atom_groups()[0] window_selection.extend(adjacent_group.atoms().extract_i_seq()) offset += 1 windows.append(residue_window( residue_id_str=residue_id, selection=window_selection, residue_selection=ag_i_seqs_no_hd, sites_reference=self.sites_cart.select(selection), window_radius=self.params.window_radius)) if (len(windows) == 0) : raise Sorry("No peptide segments meeting the filtering criteria could "+ "be extracted from the selected atoms.") else : print >> out, "%d fragments will be refined." % len(windows) if (self.mp_params.nproc == 1) : pass elif (self.mp_params.technology == "multiprocessing") : if (self.params.n_trials == 1) and (len(self.params.partial_occupancy) == 1) : # only one refinement per window, so parallelize residue iteration self.nproc_1 = self.mp_params.nproc else : # multiple refinements per window, so parallelize at that level # FIXME actually, this needs to be smarter - if the number of # available processors is greater than the number of refinements per # window, it will be more efficient to parallelize the window loop self.nproc_2 = self.mp_params.nproc else : # queuing system, so we can only parallelize residue iteration self.nproc_1 = self.mp_params.nproc self.out = null_out() self.processed_pdb_file = None print >> out, "" alt_confs.print_trial_header(out) ensembles = [] if (self.nproc_1 == 1) : self.asynchronous_output = True for window in windows : ens = self.refine_window(window) ensembles.append(ens) else : ensembles = easy_mp.parallel_map( func=self.refine_window, iterable=windows, processes=self.nproc_1, qsub_command=mp_params.qsub_command, method=mp_params.technology) self._ensembles = [ e for e in ensembles if (e is not None) ] # XXX reassert order print >> out, "" if (len(self._ensembles) == 0) : print >> out, "WARNING: no ensembles passed filtering step" print >> out, "" self._ensembles.sort(lambda a,b: a.selection[0] < b.selection[0]) self.processed_pdb_file = processed_pdb_file if (debug) : for k, ens in enumerate(filtered) : pdb_out = ens.dump_pdb_file( pdb_hierarchy=pdb_hierarchy, crystal_symmetry=fmodel.f_obs()) print >> out, "wrote %s" % pdb_out