def __init__(self, fmodel, params, mp_params, pdb_hierarchy, processed_pdb_file, selection=None, cif_objects=(), verbose=True, debug=False, out=None): if (out is None) : out = sys.stdout adopt_init_args(self, locals()) self.asynchronous_output = False from mmtbx.rotamer import rotamer_eval from scitbx.array_family import flex assert (processed_pdb_file is not None) or (len(pdb_file_names) > 0) assert (0 < self.params.window_radius <= 4) self.pdb_hierarchy = pdb_hierarchy self.processed_pdb_file = processed_pdb_file self.get_processed_pdb_file(log=out) self.sites_cart = self.pdb_hierarchy.atoms().extract_xyz().deep_copy() if (self.selection is None): self.selection = flex.bool(self.sites_cart.size(), True) self.min_required_deviation = self.params.min_deviation if (self.min_required_deviation is Auto): self.min_required_deviation = fmodel.f_obs().d_min() / 2 self._ensembles = [] self.nproc_1 = self.nproc_2 = 1 two_fofc_map, fofc_map = mmtbx.building.get_difference_maps(fmodel=fmodel) windows = [] r = rotamer_eval.RotamerEval(data_version="8000") exclude_resnames = [] if (params.exclude_resnames is not None): exclude_resnames = [ n.upper() for n in params.exclude_resnames ] for chain in self.pdb_hierarchy.only_model().chains(): if not chain.is_protein(): continue residues = chain.residue_groups() fragments = alt_confs.fragment_single_conformer_chain(residues) for fragment_residues in fragments : start = params.window_radius end = - params.window_radius for i_res, residue in enumerate(fragment_residues[start:end]): j_res = i_res + start atom_groups = residue.atom_groups() main_conf = atom_groups[0] if (main_conf.resname.upper() in exclude_resnames): continue residue_id = main_conf.id_str() ag_i_seqs = main_conf.atoms().extract_i_seq() if (not self.selection.select(ag_i_seqs).all_eq(True)): continue if (len(atom_groups) != 1): if (self.verbose): print(" residue %s already has multiple conformations"%\ residue_id, file=out) continue ag_i_seqs_no_hd = flex.size_t() for atom in main_conf.atoms(): if (atom.element.strip() not in ["H","D"]): ag_i_seqs_no_hd.append(atom.i_seq) # XXX this is probably not optimal; what should I do about the # adjacent residues? it would be good to check Ramachandran plot too if (self.params.prefilter.rotameric_only): n_outliers = alt_confs.score_rotamers(hierarchy=hierarchy, selection=ag_i_seqs) if (n_outliers > 0): if (self.verbose): print(" residue %s is a rotamer outlier" % residue_id, file=out) continue if (self.params.prefilter.use_difference_map): map_stats = building.local_density_quality( fofc_map=fofc_map, two_fofc_map=two_fofc_map, atom_selection=ag_i_seqs_no_hd, xray_structure=fmodel.xray_structure, radius=self.params.prefilter.sampling_radius) if ((map_stats.number_of_atoms_in_difference_holes() == 0) and (map_stats.fraction_of_nearby_grid_points_above_cutoff()==0)): if (self.verbose): print(" no difference density for %s" % residue_id, file=out) continue window_selection = flex.size_t() offset = - self.params.window_radius while (offset <= self.params.window_radius): adjacent_group = fragment_residues[j_res+offset].atom_groups()[0] window_selection.extend(adjacent_group.atoms().extract_i_seq()) offset += 1 windows.append(residue_window( residue_id_str=residue_id, selection=window_selection, residue_selection=ag_i_seqs_no_hd, sites_reference=self.sites_cart.select(selection), window_radius=self.params.window_radius)) if (len(windows) == 0): raise Sorry("No peptide segments meeting the filtering criteria could "+ "be extracted from the selected atoms.") else : print("%d fragments will be refined." % len(windows), file=out) if (self.mp_params.nproc == 1): pass elif (self.mp_params.technology == "multiprocessing"): if (self.params.n_trials == 1) and (len(self.params.partial_occupancy) == 1): # only one refinement per window, so parallelize residue iteration self.nproc_1 = self.mp_params.nproc else : # multiple refinements per window, so parallelize at that level # FIXME actually, this needs to be smarter - if the number of # available processors is greater than the number of refinements per # window, it will be more efficient to parallelize the window loop self.nproc_2 = self.mp_params.nproc else : # queuing system, so we can only parallelize residue iteration self.nproc_1 = self.mp_params.nproc self.out = null_out() self.processed_pdb_file = None print("", file=out) alt_confs.print_trial_header(out) ensembles = [] if (self.nproc_1 == 1): self.asynchronous_output = True for window in windows : ens = self.refine_window(window) ensembles.append(ens) else : ensembles = easy_mp.parallel_map( func=self.refine_window, iterable=windows, processes=self.nproc_1, qsub_command=mp_params.qsub_command, method=mp_params.technology) self._ensembles = [ e for e in ensembles if (e is not None) ] # XXX reassert order print("", file=out) if (len(self._ensembles) == 0): print("WARNING: no ensembles passed filtering step", file=out) print("", file=out) self._ensembles.sort(lambda a,b: a.selection[0] < b.selection[0]) self.processed_pdb_file = processed_pdb_file if (debug): for k, ens in enumerate(filtered): pdb_out = ens.dump_pdb_file( pdb_hierarchy=pdb_hierarchy, crystal_symmetry=fmodel.f_obs()) print("wrote %s" % pdb_out, file=out)
def __init__ (self, fmodel, params, mp_params, pdb_hierarchy, processed_pdb_file, selection=None, cif_objects=(), verbose=True, debug=False, out=None) : if (out is None) : out = sys.stdout adopt_init_args(self, locals()) self.asynchronous_output = False from mmtbx.rotamer import rotamer_eval from scitbx.array_family import flex assert (processed_pdb_file is not None) or (len(pdb_file_names) > 0) assert (0 < self.params.window_radius <= 4) self.pdb_hierarchy = pdb_hierarchy self.processed_pdb_file = processed_pdb_file self.get_processed_pdb_file(log=out) self.sites_cart = self.pdb_hierarchy.atoms().extract_xyz().deep_copy() if (self.selection is None) : self.selection = flex.bool(self.sites_cart.size(), True) self.min_required_deviation = self.params.min_deviation if (self.min_required_deviation is Auto) : self.min_required_deviation = fmodel.f_obs().d_min() / 2 self._ensembles = [] self.nproc_1 = self.nproc_2 = 1 two_fofc_map, fofc_map = mmtbx.building.get_difference_maps(fmodel=fmodel) windows = [] r = rotamer_eval.RotamerEval(data_version="8000") exclude_resnames = [] if (params.exclude_resnames is not None) : exclude_resnames = [ n.upper() for n in params.exclude_resnames ] for chain in self.pdb_hierarchy.only_model().chains() : main_conf = chain.conformers()[0] if (not main_conf.is_protein()) : continue residues = chain.residue_groups() fragments = alt_confs.fragment_single_conformer_chain(residues) for fragment_residues in fragments : start = params.window_radius end = - params.window_radius for i_res, residue in enumerate(fragment_residues[start:end]) : j_res = i_res + start atom_groups = residue.atom_groups() main_conf = atom_groups[0] if (main_conf.resname.upper() in exclude_resnames) : continue residue_id = main_conf.id_str() ag_i_seqs = main_conf.atoms().extract_i_seq() if (not self.selection.select(ag_i_seqs).all_eq(True)) : continue if (len(atom_groups) != 1) : if (self.verbose) : print >> out, " residue %s already has multiple conformations"%\ residue_id continue ag_i_seqs_no_hd = flex.size_t() for atom in main_conf.atoms() : if (atom.element.strip() not in ["H","D"]) : ag_i_seqs_no_hd.append(atom.i_seq) # XXX this is probably not optimal; what should I do about the # adjacent residues? it would be good to check Ramachandran plot too if (self.params.prefilter.rotameric_only) : n_outliers = alt_confs.score_rotamers(hierarchy=hierarchy, selection=ag_i_seqs) if (n_outliers > 0) : if (self.verbose) : print >> out, " residue %s is a rotamer outlier" % residue_id continue if (self.params.prefilter.use_difference_map) : map_stats = building.local_density_quality( fofc_map=fofc_map, two_fofc_map=two_fofc_map, atom_selection=ag_i_seqs_no_hd, xray_structure=fmodel.xray_structure, radius=self.params.prefilter.sampling_radius) if ((map_stats.number_of_atoms_in_difference_holes() == 0) and (map_stats.fraction_of_nearby_grid_points_above_cutoff()==0)) : if (self.verbose) : print >> out, " no difference density for %s" % residue_id continue window_selection = flex.size_t() offset = - self.params.window_radius while (offset <= self.params.window_radius) : adjacent_group = fragment_residues[j_res+offset].atom_groups()[0] window_selection.extend(adjacent_group.atoms().extract_i_seq()) offset += 1 windows.append(residue_window( residue_id_str=residue_id, selection=window_selection, residue_selection=ag_i_seqs_no_hd, sites_reference=self.sites_cart.select(selection), window_radius=self.params.window_radius)) if (len(windows) == 0) : raise Sorry("No peptide segments meeting the filtering criteria could "+ "be extracted from the selected atoms.") else : print >> out, "%d fragments will be refined." % len(windows) if (self.mp_params.nproc == 1) : pass elif (self.mp_params.technology == "multiprocessing") : if (self.params.n_trials == 1) and (len(self.params.partial_occupancy) == 1) : # only one refinement per window, so parallelize residue iteration self.nproc_1 = self.mp_params.nproc else : # multiple refinements per window, so parallelize at that level # FIXME actually, this needs to be smarter - if the number of # available processors is greater than the number of refinements per # window, it will be more efficient to parallelize the window loop self.nproc_2 = self.mp_params.nproc else : # queuing system, so we can only parallelize residue iteration self.nproc_1 = self.mp_params.nproc self.out = null_out() self.processed_pdb_file = None print >> out, "" alt_confs.print_trial_header(out) ensembles = [] if (self.nproc_1 == 1) : self.asynchronous_output = True for window in windows : ens = self.refine_window(window) ensembles.append(ens) else : ensembles = easy_mp.parallel_map( func=self.refine_window, iterable=windows, processes=self.nproc_1, qsub_command=mp_params.qsub_command, method=mp_params.technology) self._ensembles = [ e for e in ensembles if (e is not None) ] # XXX reassert order print >> out, "" if (len(self._ensembles) == 0) : print >> out, "WARNING: no ensembles passed filtering step" print >> out, "" self._ensembles.sort(lambda a,b: a.selection[0] < b.selection[0]) self.processed_pdb_file = processed_pdb_file if (debug) : for k, ens in enumerate(filtered) : pdb_out = ens.dump_pdb_file( pdb_hierarchy=pdb_hierarchy, crystal_symmetry=fmodel.f_obs()) print >> out, "wrote %s" % pdb_out