def digester(model, standard_geometry_restraints_manager, params, log=StringIO(), ): # # Digest # sgrm = standard_geometry_restraints_manager qm_grm = manager(params, log=log) for attr, value in list(vars(sgrm).items()): if attr.startswith('__'): continue setattr(qm_grm, attr, value) qm_grm.standard_geometry_restraints_manager = sgrm # make_header('QM Restraints Initialisation', out=log) qm_restraints_initialisation(params, log=log) run_program=True # # transfer_internal_coordinates=True for i, qmr in enumerate(params.qi.qm_restraints): if qmr.run_in_macro_cycles=='test': break else: run_program=False run_energies(model, params, run_program=run_program, # transfer_internal_coordinates=transfer_internal_coordinates, log=log, ) return qm_grm
def run (args, out=sys.stdout) : usage_string="""\ mmtbx.validate_ions model.pdb data.mtz [options ...] Utility to validate ions that have been built into a model, based on local environment, electron density maps, and atomic properties. """ import mmtbx.ions.identify import mmtbx.command_line cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=master_phil(), out=out, process_pdb_file=True, create_fmodel=True, set_wavelength_from_model_header=True, set_inelastic_form_factors="sasaki", prefer_anomalous=True) fmodel = cmdline.fmodel xray_structure = cmdline.xray_structure params = cmdline.params pdb_hierarchy = cmdline.pdb_hierarchy geometry = cmdline.geometry make_header("Inspecting ions", out=out) manager = mmtbx.ions.identify.create_manager( pdb_hierarchy = pdb_hierarchy, fmodel=fmodel, geometry_restraints_manager=geometry, wavelength=params.input.wavelength, params=params, verbose = params.debug, nproc = params.nproc, log=out) manager.show_current_scattering_statistics(out=out) return manager.validate_ions(out = out, debug = params.debug)
def build_window_conformers (self, stop_if_none=False) : self.extract_selection() print >> self.out, "" self.fmodel.info().show_targets(out=self.out, text="starting model") make_header("Sampling sliding windows", out=self.out) t1 = time.time() driver = sliding_window.fragment_refinement_driver( fmodel=self.fmodel, pdb_hierarchy=self.pdb_hierarchy, processed_pdb_file=self.processed_pdb_file, params=self.params.sliding_window, mp_params=self.params, selection=self.selection, cif_objects=self.cif_objects, debug=self.debug, verbose=self.verbose, out=self.out) t2 = time.time() print >> self.out, "sampling time: %.3fs" % (t2-t1) n_ensembles = driver.n_ensembles() if (n_ensembles == 0) and (stop_if_none) : raise Sorry("No new conformations generated.") self.pdb_hierarchy = driver.assemble(out=self.out) self.processed_pdb_file = None # needs to be reset return n_ensembles
def __init__(self, params, out=sys.stdout): iotbx.table_one.table.__init__( self, text_field_separation=params.output.text_field_separation, count_anomalous_pairs_separately=params.processing.count_anomalous_pairs_separately, ) self.output_dir = os.getcwd() self.params = params self.output_files = [] make_header("Running data analysis and validation", out=out) results = easy_mp.parallel_map( iterable=range(len(self.params.structure)), func=self.run_single_structure, processes=params.multiprocessing.nproc, method=params.multiprocessing.technology, preserve_exception_message=True, ) for structure, result in zip(params.structure, results): print >> out, "" print >> out, "Collecting stats for structure %s" % structure.name column = result.validation.as_table1_column( label=structure.name, wavelength=structure.wavelength, re_compute_r_factors=params.processing.re_compute_r_factors, log=out, ) self.add_column(column)
def init_amber(self, params, pdb_hierarchy, log): if hasattr(params, "amber"): self.use_amber = params.amber.use_amber print_amber_energies = params.amber.print_amber_energies if (self.use_amber): sites_cart = pdb_hierarchy.atoms().extract_xyz() compute_gradients = False make_header("Initializing AMBER", out=log) print >> log, " topology : %s" % params.amber.topology_file_name print >> log, " atom order : %s" % params.amber.order_file_name if params.amber.coordinate_file_name: print >> log, " coordinates : %s" % params.amber.coordinate_file_name from amber_adaptbx import interface self.amber_structs, sander = interface.get_amber_struct_object( params) self.sander = sander # used for cleanup import amber_adaptbx amber_geometry_manager = amber_adaptbx.geometry_manager( sites_cart=sites_cart, #number_of_restraints=geometry_energy.number_of_restraints, gradients_factory=flex.vec3_double, amber_structs=self.amber_structs) geometry = amber_geometry_manager.energies_sites( crystal_symmetry=self.geometry.crystal_symmetry, compute_gradients=compute_gradients)
def run(args, out=sys.stdout): usage_string = """\ mmtbx.validate_ions model.pdb data.mtz [options ...] Utility to validate ions that have been built into a model, based on local environment, electron density maps, and atomic properties. """ import mmtbx.ions.identify import mmtbx.command_line cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=master_phil(), out=out, process_pdb_file=True, create_fmodel=True, set_wavelength_from_model_header=True, set_inelastic_form_factors="sasaki", prefer_anomalous=True) fmodel = cmdline.fmodel xray_structure = cmdline.xray_structure params = cmdline.params pdb_hierarchy = cmdline.pdb_hierarchy geometry = cmdline.geometry make_header("Inspecting ions", out=out) manager = mmtbx.ions.identify.create_manager( pdb_hierarchy=pdb_hierarchy, fmodel=fmodel, geometry_restraints_manager=geometry, wavelength=params.input.wavelength, params=params, verbose=params.debug, nproc=params.nproc, log=out) manager.show_current_scattering_statistics(out=out) return manager.validate_ions(out=out, debug=params.debug)
def run(args, out=sys.stdout): if (len(args) == 0) or ("--help" in args): raise Usage("mmtbx.rigid_bond_test model.pdb") from mmtbx.monomer_library import pdb_interpretation import mmtbx.restraints import mmtbx.model import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="model", cif_file_def="restraints") params = cmdline.work.extract() validate_params(params) processed_pdb_file = pdb_interpretation.run(args=[params.model] + params.restraints) geometry = processed_pdb_file.geometry_restraints_manager( show_energies=True) restraints_manager = mmtbx.restraints.manager(geometry=geometry, normalization=True) model = mmtbx.model.manager( xray_structure=processed_pdb_file.xray_structure(), pdb_hierarchy=processed_pdb_file.all_chain_proxies.pdb_hierarchy, restraints_manager=restraints_manager, log=out) make_header("Rigid-bond test", out=out) model.show_rigid_bond_test(out=out, use_id_str=True, prefix=" ")
def initialisation(self, params, log=None): make_header("Initializing Amber", out=log) error = ''' no filename for %s provided use %s=<filename>.%s ''' print(" topology : %s" % params.amber.topology_file_name, file=log) if not params.amber.topology_file_name: raise Sorry(error % ('topology', 'amber.topology_file_name', 'prmtop')) if params.amber.topology_file_name.endswith('rst7'): raise Sorry('possible wrong format - need .prmtop file') print(" atom order : %s" % params.amber.order_file_name, file=log) if not params.amber.order_file_name: raise Sorry(error % ('order', 'amber.order_file_name', 'order')) if params.amber.coordinate_file_name or 1: print(" coordinates : %s" % params.amber.coordinate_file_name, file=log) if not params.amber.coordinate_file_name: raise Sorry( error % ('coordinate', 'amber.coordinate_file_name', 'rst7')) make_header('...', out=log)
def run_cartesian_dynamics(xray_structure, states_collector, restraints_manager, params, stop_at_diff, log): from mmtbx.dynamics import cartesian_dynamics make_header("Simple cartesian dynamics", out=log) sites_cart_start = xray_structure.sites_cart().deep_copy() gradients_calculator = \ cartesian_dynamics.gradients_calculator_reciprocal_space( restraints_manager = restraints_manager, sites_cart = xray_structure.sites_cart(), wc = 1) cartesian_dynamics.run(xray_structure=xray_structure, gradients_calculator=gradients_calculator, temperature=params.temperature, states_collector=states_collector, n_steps=params.number_of_steps, time_step=params.time_step, initial_velocities_zero_fraction=params. initial_velocities_zero_fraction, n_print=params.n_print, stop_cm_motion=params.stop_cm_motion, stop_at_diff=stop_at_diff, log=log, verbose=1) sites_cart_end = xray_structure.sites_cart() rmsd = sites_cart_end.rms_difference(sites_cart_start) print("", file=log) print("RMSD from starting structure: %.3f" % rmsd, file=log)
def run_cartesian_dynamics ( xray_structure, states_collector, restraints_manager, params, stop_at_diff, log) : from mmtbx.dynamics import cartesian_dynamics make_header("Simple cartesian dynamics", out=log) sites_cart_start = xray_structure.sites_cart().deep_copy() gradients_calculator = \ cartesian_dynamics.gradients_calculator_reciprocal_space( restraints_manager = restraints_manager, sites_cart = xray_structure.sites_cart(), wc = 1) cartesian_dynamics.run( xray_structure=xray_structure, gradients_calculator=gradients_calculator, temperature=params.temperature, states_collector=states_collector, n_steps=params.number_of_steps, time_step=params.time_step, initial_velocities_zero_fraction=params.initial_velocities_zero_fraction, n_print=params.n_print, stop_cm_motion=params.stop_cm_motion, stop_at_diff=stop_at_diff, log=log, verbose=1) sites_cart_end = xray_structure.sites_cart() rmsd = sites_cart_end.rms_difference(sites_cart_start) print >> log, "" print >> log, "RMSD from starting structure: %.3f" % rmsd
def cleanup(self): make_header('Cleaning up - Amber') if self.sander and self.amber_structs: if self.amber_structs.is_LES: import sanderles sanderles.cleanup() else: import sander sander.cleanup()
def run (args, out=sys.stdout) : usage_string = """ mmtbx.water_screen model.pdb data.mtz [options ...] Utility to flag waters that may actually be elemental ions, based on local environment, electron density maps, and atomic properties. """ import mmtbx.ions.identify import mmtbx.command_line cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=master_phil(), out=out, process_pdb_file=True, set_wavelength_from_model_header=True, set_inelastic_form_factors="sasaki", create_fmodel=True, prefer_anomalous=True) fmodel = cmdline.fmodel xray_structure = cmdline.xray_structure params = cmdline.params if (params.use_svm) : if (params.elements is Auto) : raise Sorry("You must specify elements to consider when using the SVM "+ "prediction method.") pdb_hierarchy = cmdline.pdb_hierarchy geometry = cmdline.geometry make_header("Inspecting water molecules", out=out) manager_class = None if (params.use_svm) : manager_class = mmtbx.ions.svm.manager manager = mmtbx.ions.identify.create_manager( pdb_hierarchy = pdb_hierarchy, fmodel = fmodel, geometry_restraints_manager = geometry, wavelength = params.input.wavelength, params = params, verbose = params.debug, nproc = params.nproc, log = out, manager_class = manager_class) manager.show_current_scattering_statistics(out=out) candidates = Auto if (params.elements is not Auto) and (params.elements is not None) : from cctbx.eltbx import chemical_elements lu = chemical_elements.proper_upper_list() elements = params.elements.replace(",", " ") candidates = elements.split() for elem in candidates : if (elem.upper() not in lu) : raise Sorry("Unrecognized element '%s'" % elem) results = manager.analyze_waters( out = out, debug = params.debug, candidates = candidates) return results, pdb_hierarchy
def run(args, out=sys.stdout): usage_string = """ mmtbx.water_screen model.pdb data.mtz [options ...] Utility to flag waters that may actually be elemental ions, based on local environment, electron density maps, and atomic properties. """ import mmtbx.ions.identify import mmtbx.command_line cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=master_phil(), out=out, process_pdb_file=True, set_wavelength_from_model_header=True, set_inelastic_form_factors="sasaki", create_fmodel=True, prefer_anomalous=True) fmodel = cmdline.fmodel xray_structure = cmdline.xray_structure params = cmdline.params if (params.use_svm): if (params.elements is Auto): raise Sorry( "You must specify elements to consider when using the SVM " + "prediction method.") pdb_hierarchy = cmdline.pdb_hierarchy geometry = cmdline.geometry make_header("Inspecting water molecules", out=out) manager_class = None if (params.use_svm): manager_class = mmtbx.ions.svm.manager manager = mmtbx.ions.identify.create_manager( pdb_hierarchy=pdb_hierarchy, fmodel=fmodel, geometry_restraints_manager=geometry, wavelength=params.input.wavelength, params=params, verbose=params.debug, nproc=params.nproc, log=out, manager_class=manager_class) manager.show_current_scattering_statistics(out=out) candidates = Auto if (params.elements is not Auto) and (params.elements is not None): from cctbx.eltbx import chemical_elements lu = chemical_elements.proper_upper_list() elements = params.elements.replace(",", " ") candidates = elements.split() for elem in candidates: if (elem.upper() not in lu): raise Sorry("Unrecognized element '%s'" % elem) results = manager.analyze_waters(out=out, debug=params.debug, candidates=candidates) return results, pdb_hierarchy
def _main(args, out=sys.stdout): """ Main entry point to this script. Parameters ---------- args : list of str List of arguments, should not include the first argument with the executable name. out : file, optional """ usage_string = """\ phenix.python -m mmtbx.ions.svm.dump_sites model.pdb data.mtz [options ...] Utility to dump information about the properties of water and ion sites in a model. This properties include local environment, electron density maps, and atomic properties. """ cmdline = load_model_and_data( args=args, master_phil=master_phil(), out=out, process_pdb_file=True, create_fmodel=True, prefer_anomalous=True, set_wavelength_from_model_header=True, set_inelastic_form_factors="sasaki", usage_string=usage_string, ) params = cmdline.params params.use_svm = True make_header("Inspecting sites", out=out) manager = ions.identify.create_manager( pdb_hierarchy=cmdline.pdb_hierarchy, fmodel=cmdline.fmodel, geometry_restraints_manager=cmdline.geometry, wavelength=params.input.wavelength, params=params, verbose=params.debug, nproc=params.nproc, log=out, ) manager.show_current_scattering_statistics(out=out) sites = dump_sites(manager) out_name = os.path.splitext( params.input.pdb.file_name[0])[0] + "_sites.pkl" print("Dumping to", out_name, file=out) easy_pickle.dump(out_name, sites)
def _main(args, out=sys.stdout): """ Main entry point to this script. Parameters ---------- args : list of str List of arguments, should not include the first argument with the executable name. out : file, optional """ usage_string = """\ phenix.python -m mmtbx.ions.svm.dump_sites model.pdb data.mtz [options ...] Utility to dump information about the properties of water and ion sites in a model. This properties include local environment, electron density maps, and atomic properties. """ cmdline = load_model_and_data( args=args, master_phil=master_phil(), out=out, process_pdb_file=True, create_fmodel=True, prefer_anomalous=True, set_wavelength_from_model_header=True, set_inelastic_form_factors="sasaki", usage_string=usage_string, ) params = cmdline.params params.use_svm = True make_header("Inspecting sites", out=out) manager = ions.identify.create_manager( pdb_hierarchy=cmdline.pdb_hierarchy, fmodel=cmdline.fmodel, geometry_restraints_manager=cmdline.geometry, wavelength=params.input.wavelength, params=params, verbose=params.debug, nproc=params.nproc, log=out, ) manager.show_current_scattering_statistics(out=out) sites = dump_sites(manager) out_name = os.path.splitext(params.input.pdb.file_name[0])[0] + "_sites.pkl" print >> out, "Dumping to", out_name easy_pickle.dump(out_name, sites)
def __init__ (self, fmodel, pdb_hierarchy, params=None, processed_pdb_file=None, geometry_restraints_manager=None, cif_objects=(), cif_files=(), # XXX bug debug=None, verbose=True, out=sys.stdout) : adopt_init_args(self, locals()) if (self.params is None) : self.params = master_phil.extract().alt_confs self.extract_selection() self.refine_cycle = 1 self.map_file = None self.r_work_start = fmodel.r_work() self.r_free_start = fmodel.r_free() t_start = time.time() for i_cycle in range(params.macro_cycles) : n_alts = self.build_residue_conformers(stop_if_none=(i_cycle==0)) if (n_alts == 0) : if (i_cycle == 0) : raise Sorry("No alternate conformations found.") else : self.refine(constrain_occupancies=False) refine_again = self.params.refinement.constrain_correlated_occupancies if (self.rejoin()) : refine_again = True self.refine(title="Refining final model") make_header("Finished", out=out) from mmtbx.validation import molprobity validation = molprobity.molprobity( pdb_hierarchy=self.pdb_hierarchy, outliers_only=False) print >> self.out, "" validation.show_summary(out=self.out, prefix=" ") make_sub_header("Analyzing final model", out=out) analyze_model.process_pdb_hierarchy( pdb_hierarchy=self.pdb_hierarchy, validation=validation, log=self.out).show(out=out, verbose=self.verbose) print >> self.out, "" print >> self.out, "Start: r_work=%6.4f r_free=%6.4f" % \ (self.r_work_start, self.r_free_start) print >> self.out, "Final: r_work=%6.4f r_free=%6.4f" % \ (self.fmodel.r_work(), self.fmodel.r_free()) t_end = time.time() print >> self.out, "" print >> self.out, "Total runtime: %d s" % int(t_end - t_start) print >> self.out, ""
def __init__(self, logger, folder, params): self.logger = logger self.folder = folder self.params = params self.prefix = os.path.basename(os.path.normpath(folder)) self.pdb_code = self.prefix[0:4] self.map_code = self.prefix[5:] self.success = True make_header('Model: %s (emdb %s)' % (self.pdb_code, self.map_code), out=self.logger) make_sub_header('Initializing', out=self.logger) self.prepare_directory() self.initialize_json()
def process(self): # make_header('Retrieving map and model files for CERES') print("Using EMDB mirror:", emdb) # # Folder where this script is supposed to be executed self.check_work_root_folder() # # Get list of folders like EMD-xxx folders = self.get_folders_sorted_by_size() # #print("Total folders that contain map:", len(folders)) self.process_folders(folders=folders)
def run (args, out=sys.stdout) : from mmtbx.disorder import analyze_model import mmtbx.validation.molprobity import mmtbx.command_line cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=master_phil(), require_data=False, create_fmodel=True, process_pdb_file=True, usage_string="mmtbx.analyze_static_disorder model.pdb", out=out) hierarchy = cmdline.pdb_hierarchy params = cmdline.params validation = mmtbx.validation.molprobity.molprobity( pdb_hierarchy=hierarchy, xray_structure=cmdline.xray_structure, fmodel=cmdline.fmodel, crystal_symmetry=cmdline.crystal_symmetry, geometry_restraints_manager=cmdline.geometry, header_info=None, keep_hydrogens=False, outliers_only=False, nuclear=False) segments = [] make_header("Analyzing model", out=out) if (params.ignore_inconsistent_occupancy) : print >> out, "Discontinuous occupancies will be ignored." process = analyze_model.process_pdb_hierarchy( pdb_hierarchy=hierarchy, validation=validation, ignore_inconsistent_occupancy=params.ignore_inconsistent_occupancy, log=out) make_sub_header("MolProbity validation", out=out) validation.show_summary(out=out) make_sub_header("Disorder analysis", out=out) if (process.n_disordered == 0) : print >> out, "No alternate conformations found." else : process.show(out=out, verbose=params.verbose) if (params.pickle) : file_name = os.path.basename( os.path.splitext(params.input.pdb.file_name[0])[0]) + ".pkl" easy_pickle.dump(file_name, process) return process
def run(args, out=sys.stdout): from mmtbx.disorder import analyze_model import mmtbx.validation.molprobity import mmtbx.command_line cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=master_phil(), require_data=False, create_fmodel=True, process_pdb_file=True, usage_string="mmtbx.analyze_static_disorder model.pdb", out=out) hierarchy = cmdline.pdb_hierarchy params = cmdline.params validation = mmtbx.validation.molprobity.molprobity( pdb_hierarchy=hierarchy, xray_structure=cmdline.xray_structure, fmodel=cmdline.fmodel, crystal_symmetry=cmdline.crystal_symmetry, geometry_restraints_manager=cmdline.geometry, header_info=None, keep_hydrogens=False, outliers_only=False, nuclear=False) segments = [] make_header("Analyzing model", out=out) if (params.ignore_inconsistent_occupancy): print("Discontinuous occupancies will be ignored.", file=out) process = analyze_model.process_pdb_hierarchy( pdb_hierarchy=hierarchy, validation=validation, ignore_inconsistent_occupancy=params.ignore_inconsistent_occupancy, log=out) make_sub_header("MolProbity validation", out=out) validation.show_summary(out=out) make_sub_header("Disorder analysis", out=out) if (process.n_disordered == 0): print("No alternate conformations found.", file=out) else: process.show(out=out, verbose=params.verbose) if (params.pickle): file_name = os.path.basename( os.path.splitext(params.input.pdb.file_name[0])[0]) + ".pkl" easy_pickle.dump(file_name, process) return process
def run (args, out=None) : if (out is None) : out = sys.stdout from mmtbx.building.alternate_conformations import single_residue import mmtbx.command_line cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=master_phil(), process_pdb_file=True, create_fmodel=True, out=out, usage_string="""\ mmtbx.build_alt_confs_simple model.pdb data.mtz [options] Simple tool for building alternate conformations by real-space refinement into difference density. Not intended for production use - use the program mmtbx.build_alternate_conformations if you want refinement and post-processing. """) params = cmdline.params validate_params(params) log = multi_out() log.register("stdout", out) log_file_name = os.path.splitext(params.output.file_name)[0] + ".log" logfile = open(log_file_name, "w") log.register("logfile", logfile) pdb_hierarchy, n_alternates = single_residue.build_cycle( pdb_hierarchy = cmdline.pdb_hierarchy, fmodel = cmdline.fmodel, geometry_restraints_manager = cmdline.geometry, params = params, cif_objects=cmdline.cif_objects, selection=params.selection, nproc=params.nproc, verbose=params.output.verbose, debug=params.output.debug, out=log) # TODO real-space refinement of multi-conformer model f = open(params.output.file_name, "w") f.write(pdb_hierarchy.as_pdb_string( crystal_symmetry=cmdline.fmodel.xray_structure)) f.close() make_header("Building complete", out=out) print >> log, "" print >> log, "Wrote %s" % params.output.file_name print >> log, "You MUST refine this model before using it!"
def run(args, out=None): if (out is None): out = sys.stdout from mmtbx.building.alternate_conformations import single_residue import mmtbx.command_line cmdline = mmtbx.command_line.load_model_and_data(args=args, master_phil=master_phil(), process_pdb_file=True, create_fmodel=True, out=out, usage_string="""\ mmtbx.build_alt_confs_simple model.pdb data.mtz [options] Simple tool for building alternate conformations by real-space refinement into difference density. Not intended for production use - use the program mmtbx.build_alternate_conformations if you want refinement and post-processing. """) params = cmdline.params validate_params(params) log = multi_out() log.register("stdout", out) log_file_name = os.path.splitext(params.output.file_name)[0] + ".log" logfile = open(log_file_name, "w") log.register("logfile", logfile) pdb_hierarchy, n_alternates = single_residue.build_cycle( pdb_hierarchy=cmdline.pdb_hierarchy, fmodel=cmdline.fmodel, geometry_restraints_manager=cmdline.geometry, params=params, cif_objects=cmdline.cif_objects, selection=params.selection, nproc=params.nproc, verbose=params.output.verbose, debug=params.output.debug, out=log) # TODO real-space refinement of multi-conformer model f = open(params.output.file_name, "w") f.write( pdb_hierarchy.as_pdb_string( crystal_symmetry=cmdline.fmodel.xray_structure)) f.close() make_header("Building complete", out=out) print >> log, "" print >> log, "Wrote %s" % params.output.file_name print >> log, "You MUST refine this model before using it!"
def __init__ (self, params, out=sys.stdout) : iotbx.table_one.table.__init__(self, text_field_separation=params.output.text_field_separation) self.output_dir = os.getcwd() self.params = params self.output_files = [] make_header("Running data analysis and validation", out=out) results = easy_mp.parallel_map( iterable=range(len(self.params.structure)), func=self.run_single_structure, processes=params.multiprocessing.nproc, method=params.multiprocessing.technology, preserve_exception_message=True) for structure, result in zip(params.structure, results) : print >> out, "" print >> out, "Collecting stats for structure %s" % structure.name column = result.validation.as_table1_column( label=structure.name, wavelength=structure.wavelength, re_compute_r_factors=params.processing.re_compute_r_factors, log=out) self.add_column(column)
def init_afitt(self, params, pdb_hierarchy, log): if hasattr(params, "afitt"): use_afitt = params.afitt.use_afitt if (use_afitt): from mmtbx.geometry_restraints import afitt # this only seems to work for a single ligand # multiple ligands are using the monomers input if params.afitt.ligand_file_name is None: ligand_paths = params.input.monomers.file_name else: ligand_paths = [params.afitt.ligand_file_name] afitt.validate_afitt_params(params.afitt) ligand_names = params.afitt.ligand_names.split(',') if len(ligand_names) != len(ligand_paths) and len( ligand_names) == 1: # get restraints library instance of ligand from mmtbx.monomer_library import server for ligand_name in ligand_names: result = server.server().get_comp_comp_id_direct( ligand_name) if result is not None: so = result.source_info # not the smartest way if so.find("file:") == 0: ligand_paths = [so.split(":")[1].strip()] if len(ligand_names) != len(ligand_paths): raise Sorry("need restraint CIF files for each ligand") make_header("Initializing AFITT", out=log) #print >> log, " ligands: %s" % params.afitt.ligand_file_name afitt_object = afitt.afitt_object(ligand_paths, ligand_names, pdb_hierarchy, params.afitt.ff, params.afitt.scale) print >> log, afitt_object afitt_object.check_covalent(self.geometry) # afitt log output afitt_object.initial_energies = afitt.get_afitt_energy( ligand_paths, ligand_names, pdb_hierarchy, params.afitt.ff, pdb_hierarchy.atoms().extract_xyz(), self.geometry) self.afitt_object = afitt_object
def run (args, out=sys.stdout) : from mmtbx.command_line import load_model_and_data import mmtbx.ncs.ligands cmdline = load_model_and_data( args=args, master_phil=master_phil_str % mmtbx.ncs.ligands.ncs_ligand_phil, out=out, process_pdb_file=True, generate_input_phil=True, usage_string="""\ mmtbx.apply_ncs_to_ligand model.pdb data.mtz ligand_code=LIG ... Given a multi-chain PDB file and a ligand residue name, find copies of the ligand in the input file, identify NCS operators relating macromolecule chains, and search for additional ligand sites by applying these operators. Used to complete ligand placement in cases where LigandFit (etc.) is only partially successful. """) pdb_hierarchy = cmdline.pdb_hierarchy fmodel = cmdline.fmodel params = cmdline.params if (params.output_file is None) : params.output_file = "ncs_ligands.pdb" if (params.output_map is None) : params.output_map = "ncs_ligands.mtz" make_header("Finding ligands by NCS operators", out=out) result = mmtbx.ncs.ligands.apply_ligand_ncs( pdb_hierarchy=pdb_hierarchy, fmodel=fmodel, params=params, ligand_code=params.ligand_code, atom_selection=None, add_new_ligands_to_pdb=params.add_to_model, log=out) result.write_pdb(params.output_file) result.write_maps(params.output_map) return result
def run(args, out=sys.stdout): if (len(args) == 0) or ("--help" in args): raise Usage("mmtbx.rigid_bond_test model.pdb") from mmtbx.monomer_library import pdb_interpretation import mmtbx.restraints import mmtbx.model import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="model", cif_file_def="restraints" ) params = cmdline.work.extract() validate_params(params) processed_pdb_file = pdb_interpretation.run(args=[params.model] + params.restraints) geometry = processed_pdb_file.geometry_restraints_manager(show_energies=True) restraints_manager = mmtbx.restraints.manager(geometry=geometry, normalization=True) model = mmtbx.model.manager( xray_structure=processed_pdb_file.xray_structure(), pdb_hierarchy=processed_pdb_file.all_chain_proxies.pdb_hierarchy, restraints_manager=restraints_manager, log=out, ) make_header("Rigid-bond test", out=out) model.show_rigid_bond_test(out=out, use_id_str=True, prefix=" ")
def run (args, out=None) : if (out is None) : out = sys.stdout make_header("mmtbx.simulate_low_res_data", out=out) print >> out, """ For generation of realistic data (model-based, or using real high-resolution data) for methods development. *********************************** WARNING: *********************************** this is an experimental program - definitely NOT bug-free. Use at your own risk! Usage: mmtbx.simulate_low_res_data model.pdb [options...] (generate data from a PDB file) mmtbx.simulate_low_res_data highres.mtz [model.pdb] [options...] (truncate high-resolution data) mmtbx.simulate_low_res_data --help (print full parameters with additional info) """ if (len(args) == 0) or ("--help" in args) : print >> out, "# full parameters:" if ("--help" in args) : master_phil.show(attributes_level=1) else : master_phil.show() return from iotbx import file_reader interpreter = master_phil.command_line_argument_interpreter( home_scope="simulate_data") pdb_in = None pdb_hierarchy = None hkl_in = None user_phil = [] for arg in args : if os.path.isfile(arg) : f = file_reader.any_file(arg) if (f.file_type == "pdb") : pdb_in = f.file_object user_phil.append(interpreter.process(arg="pdb_file=%s" % f.file_name)) elif (f.file_type == "hkl") : hkl_in = f.file_object user_phil.append(interpreter.process(arg="hkl_file=%s" % f.file_name)) elif (f.file_type == "phil") : user_phil.append(f.file_object) else : try : arg_phil = interpreter.process(arg=arg) except RuntimeError : print >> out, "ignoring uninterpretable argument '%s'" % arg else : user_phil.append(arg_phil) working_phil = master_phil.fetch(sources=user_phil) make_header("Working parameters", out=out) working_phil.show(prefix=" ") params_ = working_phil.extract() params = params_.simulate_data prepare_data( params=params, hkl_in=hkl_in, pdb_in=pdb_in, out=out)
def run (args, out=sys.stdout) : from mmtbx.building import alternate_conformations import mmtbx.command_line import mmtbx.building import iotbx.pdb.hierarchy cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=get_master_phil(), process_pdb_file=True, create_fmodel=True, out=out, usage_string="""\ mmtbx.generate_disorder model.pdb data.mtz selection="resname ATP" [occ=0.6] Perform simulatead annealing against an mFo-DFc map to generate possible alternate conformations for a selection of atoms. For development purposes and experimentation only. """) params = cmdline.params fmodel = cmdline.fmodel validate_params(params) pdb_hierarchy = cmdline.pdb_hierarchy make_header("Generating disorder", out=out) a_c_p = cmdline.processed_pdb_file.all_chain_proxies selection = a_c_p.selection(params.selection) if (params.whole_residues) : selection = iotbx.pdb.atom_selection.expand_selection_to_entire_atom_groups( selection=selection, pdb_atoms=pdb_hierarchy.atoms()) n_sel = selection.count(True) assert (n_sel > 0) print >> out, "%d atoms selected" % n_sel selection_delete = None if (params.selection_delete is not None) : selection_delete = a_c_p.selection(params.selection_delete) two_fofc_map, fofc_map = alternate_conformations.get_partial_omit_map( fmodel=fmodel.deep_copy(), selection=selection, selection_delete=selection_delete, negate_surrounding=params.negate_surrounding_sites, map_file_name=params.output.map_file_name, partial_occupancy=params.occ, resolution_factor=params.resolution_factor) target_map = fofc_map if (params.target_map == "2mFo-DFc") : target_map = two_fofc_map annealer = annealing_manager( xray_structure=fmodel.xray_structure, pdb_hierarchy=pdb_hierarchy, processed_pdb_file=cmdline.processed_pdb_file, target_map=target_map, two_fofc_map=two_fofc_map, d_min=fmodel.f_obs().d_min(), params=params, selection=selection, resolution_factor=params.resolution_factor, out=out, debug=params.output.debug) sites_ref = pdb_hierarchy.atoms().extract_xyz().deep_copy() sites_all = easy_mp.pool_map( fixed_func=annealer, iterable=range(params.n_confs), processes=params.nproc) ensemble = iotbx.pdb.hierarchy.root() if (params.output.include_starting_model) : sites_all.insert(0, sites_ref) rmsds = [] for i_conf, sites_new in enumerate(sites_all) : assert (sites_new is not None) model = pdb_hierarchy.only_model().detached_copy() model.atoms().set_xyz(sites_new) model.id = str(i_conf+1) rmsd = sites_new.select(selection).rms_difference( sites_ref.select(selection)) print >> out, "Model %d: rmsd=%.3f" % (i_conf+1, rmsd) rmsds.append(rmsd) ensemble.append_model(model) f = open(params.output.file_name, "w") f.write(ensemble.as_pdb_string( crystal_symmetry=fmodel.xray_structure)) f.close() print >> out, "Wrote ensemble model to %s" % params.output.file_name return rmsds
def run(args, out=None, verbose=True): t0 = time.time() if (out is None) : out = sys.stdout from iotbx import file_reader import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="model", reflection_file_def="map_coeffs", map_file_def="map_file", cif_file_def="cif_file", usage_string="""\ mmtbx.ringer model.pdb map_coeffs.mtz [cif_file ...] [options] %s """ % __doc__) cmdline.work.show() params = cmdline.work.extract() validate_params(params) pdb_in = file_reader.any_file(params.model, force_type="pdb") pdb_in.check_file_type("pdb") pdb_inp = iotbx.pdb.input(file_name=params.model) model = mmtbx.model.manager( model_input = pdb_inp) crystal_symmetry_model = model.crystal_symmetry() if crystal_symmetry_model is not None: crystal_symmetry_model.show_summary() hierarchy = model.get_hierarchy() map_coeffs = map_inp = difference_map_coeffs = None map_data, unit_cell = None, None # get miller array if map coefficients are provided if (params.map_coeffs is not None): mtz_in = file_reader.any_file(params.map_coeffs, force_type="hkl") mtz_in.check_file_type("hkl") best_guess = None best_labels = [] all_labels = [] for array in mtz_in.file_server.miller_arrays : if (array.is_complex_array()): labels = array.info().label_string() if (labels == params.map_label): map_coeffs = array elif (labels == params.difference_map_label): difference_map_coeffs = array else : if (params.map_label is None): all_labels.append(labels) if (labels.startswith("2FOFCWT") or labels.startswith("2mFoDFc") or labels.startswith("FWT")): best_guess = array best_labels.append(labels) if (params.difference_map_label is None): if (labels.startswith("FOFCWT") or labels.startswith("DELFWT")): difference_map_coeffs = array if (map_coeffs is None): if (len(all_labels) == 0): raise Sorry("No valid (pre-weighted) map coefficients found in file.") elif (best_guess is None): raise Sorry("Couldn't automatically determine appropriate map labels. "+ "Choices:\n %s" % " \n".join(all_labels)) elif (len(best_labels) > 1): raise Sorry("Multiple appropriate map coefficients found in file. "+ "Choices:\n %s" % "\n ".join(best_labels)) map_coeffs = best_guess print(" Guessing %s for input map coefficients" % best_labels[0], file=out) # get map_inp object and do sanity checks if map is provided else : ccp4_map_in = file_reader.any_file(params.map_file, force_type="ccp4_map") ccp4_map_in.check_file_type("ccp4_map") map_inp = ccp4_map_in.file_object base = map_model_manager( map_manager = map_inp, model = model, ignore_symmetry_conflicts = params.ignore_symmetry_conflicts) cs_consensus = base.crystal_symmetry() hierarchy = base.model().get_hierarchy() map_data = base.map_data() unit_cell = map_inp.grid_unit_cell() hierarchy.atoms().reset_i_seq() make_header("Iterating over residues", out=out) t1 = time.time() results = iterate_over_residues( pdb_hierarchy=hierarchy, map_coeffs=map_coeffs, difference_map_coeffs=difference_map_coeffs, map_data = map_data, unit_cell = unit_cell, params=params, log=out).results t2 = time.time() if (verbose): print("Time excluding I/O: %8.1fs" % (t2 - t1), file=out) print("Overall runtime: %8.1fs" % (t2 - t0), file=out) if (params.output_base is None): pdb_base = os.path.basename(params.model) params.output_base = os.path.splitext(pdb_base)[0] + "_ringer" easy_pickle.dump("%s.pkl" % params.output_base, results) print("Wrote %s.pkl" % params.output_base, file=out) csv = "\n".join([ r.format_csv() for r in results ]) open("%s.csv" % params.output_base, "w").write(csv) print("Wrote %s.csv" % params.output_base, file=out) print("\nReference:", file=out) print("""\ Lang PT, Ng HL, Fraser JS, Corn JE, Echols N, Sales M, Holton JM, Alber T. Automated electron-density sampling reveals widespread conformational polymorphism in proteins. Protein Sci. 2010 Jul;19(7):1420-31. PubMed PMID: 20499387""", file=out) if (params.gui): run_app(results) else : return results
def make_header(line, out=None): if (out is None): out = sys.stdout if (enable_show_process_info): show_process_info(out=out) str_utils.make_header(line, out=out, header_len=80)
def start_coot_and_wait(pdb_file, map_file, data_file, work_dir=None, coot_cmd="coot", needs_rebuild=False, log=None): if (log is None): log = sys.stdout if (work_dir is None): work_dir = os.getcwd() if (not os.path.isdir(work_dir)): os.makedirs(work_dir) import mmtbx.maps.utils from libtbx.str_utils import make_header from libtbx import easy_run from libtbx import group_args import cootbx base_script = __file__.replace(".pyc", ".py") os.chdir(work_dir) if (os.path.exists("coot_out_tmp.pdb")): os.remove("coot_out_tmp.pdb") if (os.path.exists("coot_out.pdb")): os.remove("coot_out.pdb") f = open("edit_in_coot.py", "w") f.write(open(base_script).read()) f.write("\n") f.write("import coot\n") cootbx.write_disable_nomenclature_errors(f) f.write("m = manager(\"%s\", \"%s\", needs_rebuild=%s)\n" % (pdb_file, map_file, needs_rebuild)) f.close() make_header("Interactive editing in Coot", log) easy_run.call("\"%s\" --no-state-script --script edit_in_coot.py &" % coot_cmd) print >> log, " Waiting for coot_out_tmp.pdb to appear at %s" % \ str(time.asctime()) base_dir = os.path.dirname(pdb_file) tmp_file = os.path.join(base_dir, "coot_out_tmp.pdb") edit_file = os.path.join(base_dir, "coot_tmp_edits.pdb") maps_file = os.path.join(base_dir, ".NEW_MAPS") while (True): if (os.path.isfile(tmp_file)): print >> log, " Coot editing complete at %s" % str(time.asctime()) break elif (os.path.isfile(maps_file)): t1 = time.time() assert os.path.isfile(edit_file) mmtbx.maps.utils.create_map_from_pdb_and_mtz( pdb_file=edit_file, mtz_file=data_file, output_file=os.path.join(base_dir, "maps_for_coot.mtz"), fill=True, out=log) t2 = time.time() print >> log, "Calculated new map coefficients in %.1fs" % (t2 - t1) os.remove(maps_file) else: time.sleep(t_wait / 1000.) shutil.move(tmp_file, "coot_out.pdb") mmtbx.maps.utils.create_map_from_pdb_and_mtz( pdb_file="coot_out.pdb", mtz_file=data_file, output_file="coot_out_maps.mtz", fill=True, out=log) new_model = os.path.join(work_dir, "coot_out.pdb") new_map = os.path.join(work_dir, "coot_out_maps.mtz") skip_rebuild = None if (needs_rebuild): if (os.path.isfile(os.path.join(base_dir, "NO_BUILD"))): skip_rebuild = True else: skip_rebuild = False return group_args(pdb_file=new_model, map_file=new_map, skip_rebuild=skip_rebuild)
def run (args, out=None, verbose=True) : t0 = time.time() if (out is None) : out = sys.stdout from iotbx import file_reader import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="model", reflection_file_def="map_coeffs", map_file_def="map_file", cif_file_def="cif_file", usage_string="""\ mmtbx.ringer model.pdb map_coeffs.mtz [cif_file ...] [options] %s """ % __doc__) cmdline.work.show() params = cmdline.work.extract() validate_params(params) pdb_in = file_reader.any_file(params.model, force_type="pdb") pdb_in.check_file_type("pdb") hierarchy = pdb_in.file_object.hierarchy hierarchy.atoms().reset_i_seq() map_coeffs = ccp4_map = difference_map_coeffs = None if (params.map_coeffs is not None) : mtz_in = file_reader.any_file(params.map_coeffs, force_type="hkl") mtz_in.check_file_type("hkl") best_guess = None best_labels = [] all_labels = [] for array in mtz_in.file_server.miller_arrays : if (array.is_complex_array()) : labels = array.info().label_string() if (labels == params.map_label) : map_coeffs = array elif (labels == params.difference_map_label) : difference_map_coeffs = array else : if (params.map_label is None) : all_labels.append(labels) if (labels.startswith("2FOFCWT") or labels.startswith("2mFoDFc") or labels.startswith("FWT")) : best_guess = array best_labels.append(labels) if (params.difference_map_label is None) : if (labels.startswith("FOFCWT") or labels.startswith("DELFWT")) : difference_map_coeffs = array if (map_coeffs is None) : if (len(all_labels) == 0) : raise Sorry("No valid (pre-weighted) map coefficients found in file.") elif (best_guess is None) : raise Sorry("Couldn't automatically determine appropriate map labels. "+ "Choices:\n %s" % " \n".join(all_labels)) elif (len(best_labels) > 1) : raise Sorry("Multiple appropriate map coefficients found in file. "+ "Choices:\n %s" % "\n ".join(best_labels)) map_coeffs = best_guess print >> out, " Guessing %s for input map coefficients" % best_labels[0] else : ccp4_map_in = file_reader.any_file(params.map_file, force_type="ccp4_map") ccp4_map_in.check_file_type("ccp4_map") ccp4_map = ccp4_map_in.file_object make_header("Iterating over residues", out=out) t1 = time.time() results = iterate_over_residues( pdb_hierarchy=hierarchy, map_coeffs=map_coeffs, difference_map_coeffs=difference_map_coeffs, ccp4_map=ccp4_map, params=params, log=out).results t2 = time.time() if (verbose) : print >> out, "Time excluding I/O: %8.1fs" % (t2 - t1) print >> out, "Overall runtime: %8.1fs" % (t2 - t0) if (params.output_base is None) : pdb_base = os.path.basename(params.model) params.output_base = os.path.splitext(pdb_base)[0] + "_ringer" easy_pickle.dump("%s.pkl" % params.output_base, results) print >> out, "Wrote %s.pkl" % params.output_base csv = "\n".join([ r.format_csv() for r in results ]) open("%s.csv" % params.output_base, "w").write(csv) print >> out, "Wrote %s.csv" % params.output_base print >> out, "\nReference:" print >> out, """\ Lang PT, Ng HL, Fraser JS, Corn JE, Echols N, Sales M, Holton JM, Alber T. Automated electron-density sampling reveals widespread conformational polymorphism in proteins. Protein Sci. 2010 Jul;19(7):1420-31. PubMed PMID: 20499387""" if (params.gui) : run_app(results) else : return results
def __init__(self, params, hkl_in=None, pdb_in=None, out=sys.stdout): adopt_init_args(self, locals()) self.params = params self.out = out self.pdb_hierarchy = None if (params.pdb_file is None) and (params.hkl_file is None): raise Sorry("No PDB file specified.") if (params.generate_noise.add_noise) and (params.hkl_file is None): if (params.generate_noise.noise_profile_file is None): raise Sorry( "noise_profile_file required when add_noise=True and " "hkl_file is undefined.") if (pdb_in is None) and (params.pdb_file is not None): f = file_reader.any_file(params.pdb_file, force_type="pdb") f.assert_file_type("pdb") self.pdb_in = f.file_object if (self.hkl_in is None) and (params.hkl_file is not None): f = file_reader.any_file(params.hkl_File, force_type="hkl") f.assert_file_type("hkl") self.hkl_in = f.file_object if (self.pdb_in is not None): self.pdb_hierarchy = self.pdb_in.hierarchy if (self.hkl_in is not None): make_header("Extracting experimental data", out=sys.stdout) f_raw, r_free = self.from_hkl() elif (self.pdb_in is not None): make_header("Generating fake data with phenix.fmodel", out=sys.stdout) f_raw, r_free = self.from_pdb() if (params.r_free_flags.file_name is not None): f_raw, r_free = self.import_r_free_flags(f_raw) self.r_free = r_free make_header("Applying low-resolution filtering", out=sys.stdout) print(" Target resolution: %.2f A" % params.d_min, file=out) self.n_residues, self.n_bases = None, None if (self.pdb_in is not None): self.n_residues, self.n_bases = get_counts(self.pdb_hierarchy) #if (params.auto_adjust): # if (pdb_in is None): # raise Sorry("You must supply a PDB file when auto_adjust=True.") self.f_out = self.truncate_data(f_raw) if (params.generate_noise.add_noise): make_header("Adding noise using sigma profile", out=sys.stdout) if (self.f_out.sigmas() is None): if (self.pdb_in is not None): iso_scale, aniso_scale = wilson_scaling( self.f_out, self.n_residues, self.n_bases) i_obs = create_sigmas(f_obs=self.f_out, params=params.generate_noise, wilson_b=iso_scale.b_wilson, return_as_amplitudes=False) apply_sigma_noise(i_obs) self.f_out = i_obs.f_sq_as_f() make_header("Done processing", out=sys.stdout) print(" Completeness after processing: %.2f%%" % (self.f_out.completeness() * 100.), file=out) print(" Final resolution: %.2f A" % self.f_out.d_min(), file=out) if (self.pdb_in is not None): iso_scale, aniso_scale = wilson_scaling(self.f_out, self.n_residues, self.n_bases) print("", file=out) print(" Scaling statistics for output data:", file=out) show_b_factor_info(iso_scale, aniso_scale, out=out) print("", file=out) self.write_output()
def __init__(self, args, master_phil, out=sys.stdout, process_pdb_file=True, require_data=True, create_fmodel=True, prefer_anomalous=None, force_non_anomalous=False, set_wavelength_from_model_header=False, set_inelastic_form_factors=None, usage_string=None, create_log_buffer=False, remove_unknown_scatterers=False, generate_input_phil=False): import mmtbx.monomer_library.pdb_interpretation import mmtbx.monomer_library.server import mmtbx.utils import mmtbx.model from iotbx import crystal_symmetry_from_any import iotbx.phil if generate_input_phil: from six import string_types assert isinstance(master_phil, string_types) master_phil = generate_master_phil_with_inputs( phil_string=master_phil) if isinstance(master_phil, str): master_phil = iotbx.phil.parse(master_phil) if (usage_string is not None): if (len(args) == 0) or ("--help" in args): raise Usage("""%s\n\nFull parameters:\n%s""" % (usage_string, master_phil.as_str(prefix=" "))) if (force_non_anomalous): assert (not prefer_anomalous) assert (set_inelastic_form_factors in [None, "sasaki", "henke"]) self.args = args self.master_phil = master_phil self.processed_pdb_file = self.pdb_inp = None self.pdb_hierarchy = self.xray_structure = None self.geometry = None self.sequence = None self.fmodel = None self.f_obs = None self.r_free_flags = None self.intensity_flag = None self.raw_data = None self.raw_flags = None self.test_flag_value = None self.miller_arrays = None self.hl_coeffs = None self.cif_objects = [] self.log = out if ("--quiet" in args) or ("quiet=True" in args): self.log = null_out() elif create_log_buffer: self.log = multi_out() self.log.register(label="stdout", file_object=out) self.log.register(label="log_buffer", file_object=StringIO()) make_header("Collecting inputs", out=self.log) cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="input.pdb.file_name", reflection_file_def="input.xray_data.file_name", cif_file_def="input.monomers.file_name", seq_file_def="input.sequence") self.working_phil = cmdline.work params = self.working_phil.extract() if len(params.input.pdb.file_name) == 0: raise Sorry("At least one PDB file is required as input.") self.cif_file_names = params.input.monomers.file_name self.pdb_file_names = params.input.pdb.file_name # SYMMETRY HANDLING - PDB FILES self.crystal_symmetry = pdb_symm = None for pdb_file_name in params.input.pdb.file_name: pdb_symm = crystal_symmetry_from_any.extract_from(pdb_file_name) if (pdb_symm is not None): break # DATA INPUT data_and_flags = hkl_symm = hkl_in = None if (params.input.xray_data.file_name is None): if (require_data): raise Sorry( "At least one reflections file is required as input.") else: # FIXME this may still require that the data file has full crystal # symmetry defined (although for MTZ input this will not be a problem) make_sub_header("Processing X-ray data", out=self.log) hkl_in = file_reader.any_file(params.input.xray_data.file_name) hkl_in.check_file_type("hkl") hkl_server = hkl_in.file_server symm = hkl_server.miller_arrays[0].crystal_symmetry() if ((symm is None) or (symm.space_group() is None) or (symm.unit_cell() is None)): if (pdb_symm is not None): from iotbx.reflection_file_utils import reflection_file_server print( "No symmetry in X-ray data file - using PDB symmetry:", file=self.log) pdb_symm.show_summary(f=out, prefix=" ") hkl_server = reflection_file_server( crystal_symmetry=pdb_symm, reflection_files=[hkl_in.file_object]) else: raise Sorry( "No crystal symmetry information found in input files." ) if (hkl_server is None): hkl_server = hkl_in.file_server data_and_flags = mmtbx.utils.determine_data_and_flags( reflection_file_server=hkl_server, parameters=params.input.xray_data, data_parameter_scope="input.xray_data", flags_parameter_scope="input.xray_data.r_free_flags", prefer_anomalous=prefer_anomalous, force_non_anomalous=force_non_anomalous, log=self.log) self.intensity_flag = data_and_flags.intensity_flag self.raw_data = data_and_flags.raw_data self.raw_flags = data_and_flags.raw_flags self.test_flag_value = data_and_flags.test_flag_value self.f_obs = data_and_flags.f_obs self.r_free_flags = data_and_flags.r_free_flags self.miller_arrays = hkl_in.file_server.miller_arrays hkl_symm = self.raw_data.crystal_symmetry() if len(self.cif_file_names) > 0: for file_name in self.cif_file_names: cif_obj = mmtbx.monomer_library.server.read_cif( file_name=file_name) self.cif_objects.append((file_name, cif_obj)) # SYMMETRY HANDLING - COMBINED if (hkl_symm is not None): use_symmetry = hkl_symm # check for weird crystal symmetry # modified from mmtbx.command_line.secondary_structure_restraints # plan to centralize functionality in another location # ------------------------------------------------------------------------- cs = pdb_symm corrupted_cs = False if cs is not None: if [cs.unit_cell(), cs.space_group()].count(None) > 0: corrupted_cs = True cs = None elif cs.unit_cell().volume() < 10: corrupted_cs = True cs = None if cs is None: if corrupted_cs: print("Symmetry information is corrupted,", end=' ', file=out) else: print("Symmetry information was not found,", end=' ', file=out) if (hkl_symm is not None): print("using symmetry from data.", file=out) cs = hkl_symm else: print("putting molecule in P1 box.", file=out) pdb_combined = iotbx.pdb.combine_unique_pdb_files( file_names=self.pdb_file_names) pdb_structure = iotbx.pdb.input(source_info=None, lines=flex.std_string( pdb_combined.raw_records)) atoms = pdb_structure.atoms() box = uctbx.non_crystallographic_unit_cell_with_the_sites_in_its_center( sites_cart=atoms.extract_xyz(), buffer_layer=3) atoms.set_xyz(new_xyz=box.sites_cart) cs = box.crystal_symmetry() pdb_symm = cs # ------------------------------------------------------------------------- from iotbx.symmetry import combine_model_and_data_symmetry self.crystal_symmetry = combine_model_and_data_symmetry( model_symmetry=pdb_symm, data_symmetry=hkl_symm) if (self.crystal_symmetry is not None) and (self.f_obs is not None): self.f_obs = self.f_obs.customized_copy( crystal_symmetry=self.crystal_symmetry).eliminate_sys_absent( ).set_info(self.f_obs.info()) self.r_free_flags = self.r_free_flags.customized_copy( crystal_symmetry=self.crystal_symmetry).eliminate_sys_absent( ).set_info(self.r_free_flags.info()) # EXPERIMENTAL PHASES target_name = "ml" if hasattr(params.input, "experimental_phases"): flag = params.input.use_experimental_phases if (flag in [True, Auto]): phases_file = params.input.experimental_phases.file_name if (phases_file is None): phases_file = params.input.xray_data.file_name phases_in = hkl_in else: phases_in = file_reader.any_file(phases_file) phases_in.check_file_type("hkl") phases_in.file_server.err = self.log # redirect error output space_group = self.crystal_symmetry.space_group() point_group = space_group.build_derived_point_group() hl_coeffs = mmtbx.utils.determine_experimental_phases( reflection_file_server=phases_in.file_server, parameters=params.input.experimental_phases, log=self.log, parameter_scope="input.experimental_phases", working_point_group=point_group, symmetry_safety_check=True) if (hl_coeffs is not None): hl_coeffs = hl_coeffs.map_to_asu() if hl_coeffs.anomalous_flag(): if (not self.f_obs.anomalous_flag()): hl_coeffs = hl_coeffs.average_bijvoet_mates() elif self.f_obs.anomalous_flag(): hl_coeffs = hl_coeffs.generate_bijvoet_mates() self.hl_coeffs = hl_coeffs.matching_set( other=self.f_obs, data_substitute=(0, 0, 0, 0)) target_name = "mlhl" # PDB INPUT self.unknown_residues_flag = False self.unknown_residues_error_message = False pdb_combined = mmtbx.utils.combine_unique_pdb_files( file_names=params.input.pdb.file_name, ) pdb_combined.report_non_unique(out=self.log) pdb_raw_records = pdb_combined.raw_records try: self.pdb_inp = iotbx.pdb.input( source_info=None, lines=flex.std_string(pdb_raw_records)) except ValueError as e: raise Sorry("Model format (PDB or mmCIF) error:\n%s" % str(e)) if (remove_unknown_scatterers): h = self.pdb_inp.construct_hierarchy() known_sel = h.atom_selection_cache().selection("not element X") if known_sel.count(False) > 0: self.pdb_inp = iotbx.pdb.input( source_info=None, lines=h.select(known_sel).as_pdb_string()) model_params = mmtbx.model.manager.get_default_pdb_interpretation_params( ) pdb_interp_params = getattr(params, "pdb_interpretation", None) if pdb_interp_params is None: pdb_interp_params = iotbx.phil.parse( input_string=mmtbx.monomer_library.pdb_interpretation. grand_master_phil_str, process_includes=True).extract() pdb_interp_params = pdb_interp_params.pdb_interpretation model_params.pdb_interpretation = pdb_interp_params stop_for_unknowns = getattr(pdb_interp_params, "stop_for_unknowns", False) or remove_unknown_scatterers if not process_pdb_file: stop_for_unknowns = True and not remove_unknown_scatterers self.model = mmtbx.model.manager( model_input=self.pdb_inp, crystal_symmetry=self.crystal_symmetry, restraint_objects=self.cif_objects, pdb_interpretation_params=model_params, process_input=False, stop_for_unknowns=stop_for_unknowns, log=self.log) if process_pdb_file: make_sub_header("Processing PDB file(s)", out=self.log) self.model.process_input_model(make_restraints=True) full_grm = self.model.get_restraints_manager() self.geometry = full_grm.geometry self.processed_pdb_file = self.model._processed_pdb_file # to remove later XXX self.xray_structure = self.model.get_xray_structure() self.pdb_hierarchy = self.model.get_hierarchy() self.pdb_hierarchy.atoms().reset_i_seq() # wavelength if (params.input.energy is not None): if (params.input.wavelength is not None): raise Sorry("Both wavelength and energy have been specified!") params.input.wavelength = 12398.424468024265 / params.input.energy if (set_wavelength_from_model_header and params.input.wavelength is None): wavelength = self.pdb_inp.extract_wavelength() if (wavelength is not None): print("", file=self.log) print("Using wavelength = %g from PDB header" % wavelength, file=self.log) params.input.wavelength = wavelength # set scattering table if (data_and_flags is not None): self.model.setup_scattering_dictionaries( scattering_table=params.input.scattering_table, d_min=self.f_obs.d_min(), log=self.log, set_inelastic_form_factors=set_inelastic_form_factors, iff_wavelength=params.input.wavelength) self.xray_structure.show_summary(f=self.log) # FMODEL SETUP if (create_fmodel) and (data_and_flags is not None): make_sub_header("F(model) initialization", out=self.log) skip_twin_detection = getattr(params.input, "skip_twin_detection", True) twin_law = getattr(params.input, "twin_law", None) if (twin_law is Auto): if (self.hl_coeffs is not None): raise Sorry( "Automatic twin law determination not supported when " + "experimental phases are used.") elif (not skip_twin_detection): twin_law = Auto if (twin_law is Auto): print("Twinning will be detected automatically.", file=self.log) self.fmodel = mmtbx.utils.fmodel_simple( xray_structures=[self.xray_structure], scattering_table=params.input.scattering_table, f_obs=self.f_obs, r_free_flags=self.r_free_flags, skip_twin_detection=skip_twin_detection, target_name=target_name, log=self.log) else: if ((twin_law is not None) and (self.hl_coeffs is not None)): raise Sorry( "Automatic twin law determination not supported when " + "experimental phases are used.") self.fmodel = mmtbx.utils.fmodel_manager( f_obs=self.f_obs, xray_structure=self.xray_structure, r_free_flags=self.r_free_flags, twin_law=params.input.twin_law, hl_coeff=self.hl_coeffs, target_name=target_name) self.fmodel.update_all_scales(params=None, log=self.log, optimize_mask=True, show=True) self.fmodel.info().show_rfactors_targets_scales_overall( out=self.log) # SEQUENCE if (params.input.sequence is not None): seq_file = file_reader.any_file(params.input.sequence, force_type="seq", raise_sorry_if_errors=True) self.sequence = seq_file.file_object # UNMERGED DATA self.unmerged_i_obs = None if hasattr(params.input, "unmerged_data"): if (params.input.unmerged_data.file_name is not None): self.unmerged_i_obs = load_and_validate_unmerged_data( f_obs=self.f_obs, file_name=params.input.unmerged_data.file_name, data_labels=params.input.unmerged_data.labels, log=self.log) self.params = params print("", file=self.log) print("End of input processing", file=self.log)
def show(self, out=sys.stdout, outliers_only=True, suppress_summary=False, show_percentiles=False): """ Comprehensive output with individual outlier lists, plus summary. """ if (self.xtriage is not None): self.xtriage.summarize_issues().show(out=out) if (self.data_stats is not None): make_header("Experimental data", out=out) self.data_stats.show(out=out, prefix=" ") if (self.real_space is not None): make_sub_header("Residues with poor real-space CC", out=out) self.real_space.show(out=out, prefix=" ") if (self.waters is not None): make_sub_header("Suspicious water molecules", out=out) self.waters.show(out=out, prefix=" ") if (self.model_stats is not None): make_header("Model properties", out=out) self.model_stats.show(prefix=" ", out=out) if (self.restraints is not None): make_header("Geometry restraints", out=out) self.restraints.show(out=out, prefix=" ") make_header("Molprobity validation", out=out) if (self.ramalyze is not None): make_sub_header("Ramachandran angles", out=out) self.ramalyze.show(out=out, prefix=" ", outliers_only=outliers_only) ##### omegalyze ################################################################ if (self.omegalyze is not None): make_sub_header("Omegalyze analysis", out=out) self.omegalyze.show(out=out, prefix=" ", outliers_only=outliers_only) ##### omegalyze ################################################################ if (self.rotalyze is not None): make_sub_header("Sidechain rotamers", out=out) self.rotalyze.show(out=out, prefix=" ", outliers_only=outliers_only) if (self.cbetadev is not None): make_sub_header("C-beta deviations", out=out) self.cbetadev.show(out=out, prefix=" ", outliers_only=outliers_only) if (self.clashes is not None): make_sub_header("Bad clashes", out=out) self.clashes.show(out=out, prefix=" ") if (self.nqh_flips is not None): make_sub_header("Asn/Gln/His flips", out=out) self.nqh_flips.show(out=out, prefix=" ") if (self.rna is not None): make_header("RNA validation", out=out) self.rna.show(out=out, prefix=" ", outliers_only=outliers_only) if (not suppress_summary): make_header("Summary", out=out) self.show_summary(out=out, prefix=" ", show_percentiles=show_percentiles) return self
def process_residues (self, out=None) : if (out is None) : out = sys.stdout n_res_removed = 0 n_sc_removed = 0 n_res_protein = 0 pruned = [] make_header("Pruning residues and sidechains", out=out) for chain in self.pdb_hierarchy.models()[0].chains() : if (not chain.is_protein()) : continue residue_id_hash = {} removed_resseqs = [] if (len(chain.conformers()) > 1) : print >> out, "WARNING: chain '%s' has multiple conformers" % chain.id for j_seq, residue_group in enumerate(chain.residue_groups()) : n_res_protein += 1 residue_id_hash[residue_group.resid()] = j_seq for atom_group in residue_group.atom_groups() : ag_id_str = id_str(chain, residue_group, atom_group) resname = atom_group.resname remove_atom_group = False sidechain_atoms = [] backbone_atoms = [] for atom in atom_group.atoms() : if (atom.name.strip() in ["N", "O", "C", "H", "CA", "CB"]) : backbone_atoms.append(atom) elif (not atom_group.resname in ["ALA", "GLY"]) : sidechain_atoms.append(atom) if (len(backbone_atoms) > 0) and (self.params.mainchain) : mc_stats = self.get_map_stats_for_atoms(backbone_atoms) if (mc_stats.mean_2fofc < self.params.min_backbone_2fofc) : pruned.append(residue_summary( chain_id=chain.id, residue_group=residue_group, atom_group=atom_group, score=mc_stats.mean_2fofc, score_type="sigma", atoms_type="C-alpha")) remove_atom_group = True elif (mc_stats.mean_fofc < self.params.min_backbone_fofc) : pruned.append(residue_summary( chain_id=chain.id, residue_group=residue_group, atom_group=atom_group, score=mc_stats.mean_fofc, score_type="sigma", atoms_type="C-alpha")) remove_atom_group = True # map values look okay - now check overall CC if (not remove_atom_group) : res_stats = self.get_map_stats_for_atoms(atom_group.atoms()) if (res_stats.cc < self.params.min_cc) and (self.params.mainchain): pruned.append(residue_summary( chain_id=chain.id, residue_group=residue_group, atom_group=atom_group, score=res_stats.cc)) remove_atom_group = True elif (len(sidechain_atoms) > 0) and (self.params.sidechains) : # overall CC is acceptable - now look at sidechain alone remove_sidechain = False sc_stats = self.get_map_stats_for_atoms(sidechain_atoms) if (sc_stats is None) : continue if (sc_stats.cc < self.params.min_cc_sidechain) : pruned.append(residue_summary( chain_id=chain.id, residue_group=residue_group, atom_group=atom_group, score=sc_stats.cc, atoms_type="sidechain")) remove_sidechain = True else : if (sc_stats.mean_2fofc < self.params.min_sidechain_2fofc) : pruned.append(residue_summary( chain_id=chain.id, residue_group=residue_group, atom_group=atom_group, score=sc_stats.mean_2fofc, score_type="sigma", atoms_type="sidechain")) remove_sidechain = True elif (sc_stats.mean_fofc < self.params.max_sidechain_fofc) : pruned.append(residue_summary( chain_id=chain.id, residue_group=residue_group, atom_group=atom_group, score=sc_stats.mean_fofc, score_type="sigma", atoms_type="sidechain", map_type="mFo-Dfc")) remove_sidechain = True if ((self.params.check_cgamma) and (resname in ["ARG","LYS","TYR","TRP","PHE"])) : c_gamma = c_delta = None for atom in atom_group.atoms() : if (atom.name.strip() == "CG") : c_gamma = atom elif (atom.name.strip() == "CD") : c_delta = atom if (c_gamma is not None) : map_values = self.get_density_at_atom(c_gamma) # FIXME this is horribly subjective, but so is the logic # I use for manual pruning... if ((map_values.two_fofc < 0.8) or ((map_values.two_fofc < 1.0) and (map_values.fofc < -3.0))) : pruned.append(residue_summary( chain_id=chain.id, residue_group=residue_group, atom_group=atom_group, score=map_values.two_fofc, score_type="sigma", atoms_type="sidechain", map_type="2mFo-Dfc")) remove_sidechain = True if (remove_sidechain) : assert (self.params.sidechains) for atom in sidechain_atoms : atom_group.remove_atom(atom) n_sc_removed += 1 if (remove_atom_group) : assert (self.params.mainchain) residue_group.remove_atom_group(atom_group) if (len(residue_group.atom_groups()) == 0) : chain.remove_residue_group(residue_group) n_res_removed += 1 removed_resseqs.append(residue_group.resseq_as_int()) # Final pass: remove lone single/pair residues if ((self.params.mainchain) and (self.params.min_fragment_size is not None)) : n_rg = len(chain.residue_groups()) for j_seq, residue_group in enumerate(chain.residue_groups()) : if (residue_group.icode.strip() != "") : continue resseq = residue_group.resseq_as_int() remove = False if (resseq - 1 in removed_resseqs) or (j_seq == 0) : print "candidate:", resseq for k in range(1, self.params.min_fragment_size+1) : if (resseq + k in removed_resseqs) : remove = True break elif ((j_seq + k) >= len(chain.residue_groups())) : remove = True break if (remove) : pruned.append(residue_summary( chain_id=chain.id, residue_group=residue_group, atom_group=atom_group, score=None)) chain.remove_residue_group(residue_group) removed_resseqs.append(resseq) n_res_removed += 1 for outlier in pruned : outlier.show(out) print >> out, "Removed %d residues and %d sidechains" % (n_res_removed, n_sc_removed) return group_args( n_res_protein=n_res_protein, n_res_removed=n_res_removed, n_sc_removed=n_sc_removed, outliers=pruned)
def __init__ (self, args, master_phil, out=sys.stdout, process_pdb_file=True, require_data=True, create_fmodel=True, prefer_anomalous=None, force_non_anomalous=False, set_wavelength_from_model_header=False, set_inelastic_form_factors=None, usage_string=None, create_log_buffer=False, remove_unknown_scatterers=False, generate_input_phil=False) : import mmtbx.monomer_library.pdb_interpretation import mmtbx.monomer_library.server import mmtbx.utils from iotbx import crystal_symmetry_from_any from iotbx import file_reader import iotbx.phil if generate_input_phil : assert isinstance(master_phil, basestring) master_phil = generate_master_phil_with_inputs(phil_string=master_phil) if isinstance(master_phil, str) : master_phil = iotbx.phil.parse(master_phil) if (usage_string is not None) : if (len(args) == 0) or ("--help" in args) : raise Usage("""%s\n\nFull parameters:\n%s""" % (usage_string, master_phil.as_str(prefix=" "))) if (force_non_anomalous) : assert (not prefer_anomalous) assert (set_inelastic_form_factors in [None, "sasaki", "henke"]) self.args = args self.master_phil = master_phil self.processed_pdb_file = self.pdb_inp = None self.pdb_hierarchy = self.xray_structure = None self.geometry = None self.sequence = None self.fmodel = None self.f_obs = None self.r_free_flags = None self.intensity_flag = None self.raw_data = None self.raw_flags = None self.test_flag_value = None self.miller_arrays = None self.hl_coeffs = None self.cif_objects = [] self.log = out if ("--quiet" in args) or ("quiet=True" in args) : self.log = null_out() elif create_log_buffer : self.log = multi_out() self.log.register(label="stdout", file_object=out) self.log.register(label="log_buffer", file_object=StringIO()) make_header("Collecting inputs", out=self.log) cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="input.pdb.file_name", reflection_file_def="input.xray_data.file_name", cif_file_def="input.monomers.file_name", seq_file_def="input.sequence") self.working_phil = cmdline.work params = self.working_phil.extract() if len(params.input.pdb.file_name) == 0 : raise Sorry("At least one PDB file is required as input.") self.cif_file_names = params.input.monomers.file_name self.pdb_file_names = params.input.pdb.file_name # SYMMETRY HANDLING - PDB FILES self.crystal_symmetry = pdb_symm = None for pdb_file_name in params.input.pdb.file_name : pdb_symm = crystal_symmetry_from_any.extract_from(pdb_file_name) if (pdb_symm is not None) : break # DATA INPUT data_and_flags = hkl_symm = hkl_in = None if (params.input.xray_data.file_name is None) : if (require_data) : raise Sorry("At least one reflections file is required as input.") else : # FIXME this may still require that the data file has full crystal # symmetry defined (although for MTZ input this will not be a problem) make_sub_header("Processing X-ray data", out=self.log) hkl_in = file_reader.any_file(params.input.xray_data.file_name) hkl_in.check_file_type("hkl") hkl_server = hkl_in.file_server symm = hkl_server.miller_arrays[0].crystal_symmetry() if ((symm is None) or (symm.space_group() is None) or (symm.unit_cell() is None)) : if (pdb_symm is not None) : from iotbx.reflection_file_utils import reflection_file_server print >> self.log, \ "No symmetry in X-ray data file - using PDB symmetry:" pdb_symm.show_summary(f=out, prefix=" ") hkl_server = reflection_file_server( crystal_symmetry=pdb_symm, reflection_files=[hkl_in.file_object]) else : raise Sorry("No crystal symmetry information found in input files.") if (hkl_server is None) : hkl_server = hkl_in.file_server data_and_flags = mmtbx.utils.determine_data_and_flags( reflection_file_server=hkl_server, parameters=params.input.xray_data, data_parameter_scope="input.xray_data", flags_parameter_scope="input.xray_data.r_free_flags", prefer_anomalous=prefer_anomalous, force_non_anomalous=force_non_anomalous, log=self.log) self.intensity_flag = data_and_flags.intensity_flag self.raw_data = data_and_flags.raw_data self.raw_flags = data_and_flags.raw_flags self.test_flag_value = data_and_flags.test_flag_value self.f_obs = data_and_flags.f_obs self.r_free_flags = data_and_flags.r_free_flags self.miller_arrays = hkl_in.file_server.miller_arrays hkl_symm = self.raw_data.crystal_symmetry() if len(self.cif_file_names) > 0 : for file_name in self.cif_file_names : cif_obj = mmtbx.monomer_library.server.read_cif(file_name=file_name) self.cif_objects.append((file_name, cif_obj)) # SYMMETRY HANDLING - COMBINED if (hkl_symm is not None) : use_symmetry = hkl_symm from iotbx.symmetry import combine_model_and_data_symmetry self.crystal_symmetry = combine_model_and_data_symmetry( model_symmetry=pdb_symm, data_symmetry=hkl_symm) if (self.crystal_symmetry is not None) and (self.f_obs is not None) : self.f_obs = self.f_obs.customized_copy( crystal_symmetry=self.crystal_symmetry).eliminate_sys_absent().set_info( self.f_obs.info()) self.r_free_flags = self.r_free_flags.customized_copy( crystal_symmetry=self.crystal_symmetry).eliminate_sys_absent().set_info( self.r_free_flags.info()) # EXPERIMENTAL PHASES target_name = "ml" if hasattr(params.input, "experimental_phases") : flag = params.input.use_experimental_phases if (flag in [True, Auto]) : phases_file = params.input.experimental_phases.file_name if (phases_file is None) : phases_file = params.input.xray_data.file_name phases_in = hkl_in else : phases_in = file_reader.any_file(phases_file) phases_in.check_file_type("hkl") phases_in.file_server.err = self.log # redirect error output space_group = self.crystal_symmetry.space_group() point_group = space_group.build_derived_point_group() hl_coeffs = mmtbx.utils.determine_experimental_phases( reflection_file_server = phases_in.file_server, parameters = params.input.experimental_phases, log = self.log, parameter_scope = "input.experimental_phases", working_point_group = point_group, symmetry_safety_check = True) if (hl_coeffs is not None) : hl_coeffs = hl_coeffs.map_to_asu() if hl_coeffs.anomalous_flag() : if (not self.f_obs.anomalous_flag()) : hl_coeffs = hl_coeffs.average_bijvoet_mates() elif self.f_obs.anomalous_flag() : hl_coeffs = hl_coeffs.generate_bijvoet_mates() self.hl_coeffs = hl_coeffs.matching_set(other=self.f_obs, data_substitute=(0,0,0,0)) target_name = "mlhl" # PDB INPUT self.unknown_residues_flag = False self.unknown_residues_error_message = False if process_pdb_file : pdb_interp_params = getattr(params, "pdb_interpretation", None) if (pdb_interp_params is None) : pdb_interp_params = \ mmtbx.monomer_library.pdb_interpretation.master_params.extract() make_sub_header("Processing PDB file(s)", out=self.log) pdb_combined = mmtbx.utils.combine_unique_pdb_files( file_names=params.input.pdb.file_name,) pdb_combined.report_non_unique(out=self.log) pdb_raw_records = pdb_combined.raw_records processed_pdb_files_srv = mmtbx.utils.process_pdb_file_srv( cif_objects=self.cif_objects, pdb_interpretation_params=pdb_interp_params, crystal_symmetry=self.crystal_symmetry, use_neutron_distances=params.input.scattering_table=="neutron", stop_for_unknowns=getattr(pdb_interp_params, "stop_for_unknowns",False), log=self.log) self.processed_pdb_file, self.pdb_inp = \ processed_pdb_files_srv.process_pdb_files( raw_records = pdb_raw_records, stop_if_duplicate_labels = False, allow_missing_symmetry=\ (self.crystal_symmetry is None) and (not require_data)) error_msg = self.processed_pdb_file.all_chain_proxies.\ fatal_problems_message( ignore_unknown_scattering_types=False, ignore_unknown_nonbonded_energy_types=False) if (error_msg is not None) : self.unknown_residues_flag = True self.unknown_residues_error_message = error_msg self.geometry = self.processed_pdb_file.geometry_restraints_manager( show_energies=False) assert (self.geometry is not None) self.xray_structure = self.processed_pdb_file.xray_structure() chain_proxies = self.processed_pdb_file.all_chain_proxies self.pdb_hierarchy = chain_proxies.pdb_hierarchy else : pdb_file_object = mmtbx.utils.pdb_file( pdb_file_names=params.input.pdb.file_name, cif_objects=self.cif_objects, crystal_symmetry=self.crystal_symmetry, log=self.log) self.pdb_inp = pdb_file_object.pdb_inp self.pdb_hierarchy = self.pdb_inp.construct_hierarchy() if (remove_unknown_scatterers) : known_sel = self.pdb_hierarchy.atom_selection_cache().selection( "not element X") if (known_sel.count(True) != len(known_sel)) : self.pdb_hierarchy = self.pdb_hierarchy.select(known_sel) self.xray_structure = self.pdb_hierarchy.extract_xray_structure( crystal_symmetry=self.crystal_symmetry) self.pdb_hierarchy.atoms().reset_i_seq() if (self.xray_structure is None) : self.xray_structure = self.pdb_inp.xray_structure_simple( crystal_symmetry=self.crystal_symmetry) # wavelength if (params.input.energy is not None) : if (params.input.wavelength is not None) : raise Sorry("Both wavelength and energy have been specified!") params.input.wavelength = 12398.424468024265 / params.input.energy if (set_wavelength_from_model_header and params.input.wavelength is None) : wavelength = self.pdb_inp.extract_wavelength() if (wavelength is not None) : print >> self.log, "" print >> self.log, "Using wavelength = %g from PDB header" % wavelength params.input.wavelength = wavelength # set scattering table if (data_and_flags is not None) : self.xray_structure.scattering_type_registry( d_min=self.f_obs.d_min(), table=params.input.scattering_table) if ((params.input.wavelength is not None) and (set_inelastic_form_factors is not None)) : self.xray_structure.set_inelastic_form_factors( photon=params.input.wavelength, table=set_inelastic_form_factors) make_sub_header("xray_structure summary", out=self.log) self.xray_structure.scattering_type_registry().show(out = self.log) self.xray_structure.show_summary(f=self.log) # FMODEL SETUP if (create_fmodel) and (data_and_flags is not None) : make_sub_header("F(model) initialization", out=self.log) skip_twin_detection = getattr(params.input, "skip_twin_detection", None) twin_law = getattr(params.input, "twin_law", None) if (twin_law is Auto) : if (self.hl_coeffs is not None) : raise Sorry("Automatic twin law determination not supported when "+ "experimental phases are used.") elif (skip_twin_detection is not None) : twin_law = Auto if (twin_law is Auto) : print >> self.log, "Twinning will be detected automatically." self.fmodel = mmtbx.utils.fmodel_simple( xray_structures=[self.xray_structure], scattering_table=params.input.scattering_table, f_obs=self.f_obs, r_free_flags=self.r_free_flags, skip_twin_detection=skip_twin_detection, target_name=target_name, log=self.log) else : if ((twin_law is not None) and (self.hl_coeffs is not None)) : raise Sorry("Automatic twin law determination not supported when "+ "experimental phases are used.") self.fmodel = mmtbx.utils.fmodel_manager( f_obs=self.f_obs, xray_structure=self.xray_structure, r_free_flags=self.r_free_flags, twin_law=params.input.twin_law, hl_coeff=self.hl_coeffs, target_name=target_name) self.fmodel.update_all_scales( params=None, log=self.log, optimize_mask=True, show=True) self.fmodel.info().show_rfactors_targets_scales_overall(out=self.log) # SEQUENCE if (params.input.sequence is not None) : seq_file = file_reader.any_file(params.input.sequence, force_type="seq", raise_sorry_if_errors=True) self.sequence = seq_file.file_object # UNMERGED DATA self.unmerged_i_obs = None if hasattr(params.input, "unmerged_data") : if (params.input.unmerged_data.file_name is not None) : self.unmerged_i_obs = load_and_validate_unmerged_data( f_obs=self.f_obs, file_name=params.input.unmerged_data.file_name, data_labels=params.input.unmerged_data.labels, log=self.log) self.params = params print >> self.log, "" print >> self.log, "End of input processing"
def exercise(): from libtbx.test_utils import show_diff, Exception_expected import cPickle # from libtbx.str_utils import split_keeping_spaces assert split_keeping_spaces(s="") == [] assert split_keeping_spaces(s=" ") == [" "] assert split_keeping_spaces(s="a") == ["a"] assert split_keeping_spaces(s="abc") == ["abc"] assert split_keeping_spaces(s=" a") == [" ", "a"] assert split_keeping_spaces(s=" a") == [" ", "a"] assert split_keeping_spaces(s=" abc") == [" ", "abc"] assert split_keeping_spaces(s=" abc ") == [" ", "abc", " "] assert split_keeping_spaces(s=" abc ") == [" ", "abc", " "] assert split_keeping_spaces(s="a ") == ["a", " "] assert split_keeping_spaces(s="a ") == ["a", " "] assert split_keeping_spaces(s="abc ") == ["abc", " "] assert split_keeping_spaces(s="a b") == ["a", " ", "b"] assert split_keeping_spaces(s="a b") == ["a", " ", "b"] assert split_keeping_spaces(s=" a b c d ") == [ " ", "a", " ", "b", " ", "c", " ", "d", " "] # from libtbx.str_utils import size_as_string_with_commas assert size_as_string_with_commas(0) == "0" assert size_as_string_with_commas(1) == "1" assert size_as_string_with_commas(-1) == "-1" assert size_as_string_with_commas(10) == "10" assert size_as_string_with_commas(100) == "100" assert size_as_string_with_commas(1000) == "1,000" assert size_as_string_with_commas(12345) == "12,345" assert size_as_string_with_commas(12345678) == "12,345,678" assert size_as_string_with_commas(-12345678) == "-12,345,678" # from libtbx.str_utils import show_string assert show_string("abc") == '"abc"' assert show_string("a'c") == '"a\'c"' assert show_string('a"c') == "'a\"c'" assert show_string('\'"c') == '"\'\\"c"' # from libtbx.str_utils import prefix_each_line assert prefix_each_line(prefix="^", lines_as_one_string="""\ hello world""") == """\ ^hello ^world""" # from libtbx.str_utils import prefix_each_line_suffix assert prefix_each_line_suffix(prefix="^", lines_as_one_string="""\ hello world""", suffix=" ") == """\ ^hello ^world""" assert prefix_each_line_suffix(prefix="^", lines_as_one_string="""\ hello world""", suffix=" ", rstrip=False) == """\ ^hello%s ^world """ % " " # from libtbx.str_utils import show_sorted_by_counts import cStringIO out = cStringIO.StringIO() assert show_sorted_by_counts( label_count_pairs=[("b", 3), ("a", 3), ("c", -2)], out=out, prefix="%") assert not show_diff(out.getvalue(), """\ %"a" 3 %"b" 3 %"c" -2 """) out = cStringIO.StringIO() assert show_sorted_by_counts( label_count_pairs=[("b", -3), ("a", -3), ("c", 2)], reverse=False, out=out, prefix="%", annotations=[None, "", "x"]) assert not show_diff(out.getvalue(), """\ %"c" 2 x %"a" -3 %"b" -3 """) # from libtbx.str_utils import line_breaker for string, expected_result in [ ("", [""]), ("this is", ["this is"]), ("this is a", ["this is", "a"]), ("this is a sentence", ["this is", "a", "sentence"]), ("this is a longer sentence", ["this is", "a", "longer", "sentence"]), ("this is a very long sentence indeed", ["this is", "a very", "long", "sentence", "indeed"])]: assert [block for block in line_breaker(string, width=7)]==expected_result # from libtbx.str_utils import StringIO out1 = cStringIO.StringIO() out2 = StringIO() out3 = StringIO("Hello world!\n") print >> out1, "Hello world!" print >> out2, "Hello world!" try : print >> out3, "Hello world!" except AttributeError : pass else : raise Exception_expected out4 = cPickle.loads(cPickle.dumps(out2)) out5 = cPickle.loads(cPickle.dumps(out3)) assert out4.getvalue()==out1.getvalue()==out2.getvalue()==out5.getvalue() # from libtbx.str_utils import reformat_terminal_text txt1 = """ This is some terminal-formatted text which needs to be reset. """ assert (reformat_terminal_text(txt1) == "This is some terminal-formatted text which needs to be reset.") txt2 = """ This is more terminal-formatted text which needs to be reset. """ # from libtbx.str_utils import strip_lines, rstrip_lines lines = [" This is more ", " terminal-formatted ", " text "] assert (strip_lines(txt2) == "\nThis is more\nterminal-formatted\ntext which needs\nto be reset.") assert (rstrip_lines(txt2) == "\n This is more\n terminal-formatted\n text which needs\n to be reset." ) # from libtbx.str_utils import expandtabs_track_columns def check(s): es,js = expandtabs_track_columns(s=s) assert len(js) == len(s) assert es == s.expandtabs() sr = "".join([es[j] for j in js]) assert sr == s.replace("\t", " ") check("") check("\t") check("\t\t") check("\ty") check("x\ty") check("x\ty\tz") check("\txy\t\tz") check("abcdefg\txy\t\tz") check("ab defgh\txyz\t\tu") # from libtbx.str_utils import format_value assert format_value("%.4f", 1.2345678) == "1.2346" assert format_value("%.4f", None) == " None" assert format_value("%.4f", None, replace_none_with="---") == " ---" # from libtbx.str_utils import make_header out = StringIO() make_header("Header 1", out=out) assert (out.getvalue() == """ =================================== Header 1 ================================== """) out = StringIO() make_header("Header 2", out=out) assert (out.getvalue() == """ =================================== Header 2 ================================== """) # from libtbx.str_utils import string_representation iset = range(130) + range(250,256) for i in iset: s = chr(i) for j in iset: ss = s + chr(j) assert string_representation( string=ss, preferred_quote="'", alternative_quote='"') == repr(ss) from libtbx.str_utils import framed_output out = StringIO() box = framed_output(out, frame='#') print >> box, "Hello, world!" box.close() assert (out.getvalue() == """ ################# # Hello, world! # ################# """) out = StringIO() box = framed_output(out, frame='-', width=80, center=True, title="Refinement stats") box.write("r_free = 0.1234") box.write(" ") box.write("r_work = 0.1567") box.close() assert (out.getvalue() == """ |--------------------------------Refinement stats------------------------------| | r_free = 0.1234 r_work = 0.1567 | |------------------------------------------------------------------------------| """) out = StringIO() box = framed_output(out, frame='-', width=72, prefix=" ", title="Validation summary") print >> box, "Overall MolProbity score: 2.56" box.add_separator() print >> box, """\ Ramachandran favored: 97.5 % outliers: 2.5 % Rotamer outliers: 5.9 % Clashscore: 10.9""" assert (out.getvalue() == "") del box assert (out.getvalue() == """ |-Validation summary---------------------------------------------------| | Overall MolProbity score: 2.56 | |----------------------------------------------------------------------| | Ramachandran favored: 97.5 % | | outliers: 2.5 % | | Rotamer outliers: 5.9 % | | Clashscore: 10.9 | |----------------------------------------------------------------------| """) from libtbx.str_utils import print_message_in_box out = StringIO() print_message_in_box( message="This is some terminal-formatted text which needs to be reset.", out=out, width=32, center=True, prefix=" ", frame='*') assert (out.getvalue() == """ ******************************** * This is some * * terminal-formatted text * * which needs to be reset. * ******************************** """)
def show (self, out=sys.stdout, outliers_only=True, suppress_summary=False, show_percentiles=False) : """ Comprehensive output with individual outlier lists, plus summary. """ if (self.xtriage is not None) : self.xtriage.summarize_issues().show(out=out) if (self.data_stats is not None) : make_header("Experimental data", out=out) self.data_stats.show(out=out, prefix=" ") if (self.real_space is not None) : make_sub_header("Residues with poor real-space CC", out=out) self.real_space.show(out=out, prefix=" ") if (self.waters is not None) : make_sub_header("Suspicious water molecules", out=out) self.waters.show(out=out, prefix=" ") if (self.model_stats is not None) : make_header("Model properties", out=out) self.model_stats.show(prefix=" ", out=out) if (self.restraints is not None) : make_header("Geometry restraints", out=out) self.restraints.show(out=out, prefix=" ") make_header("Molprobity validation", out=out) if (self.ramalyze is not None) : make_sub_header("Ramachandran angles", out=out) self.ramalyze.show(out=out, prefix=" ", outliers_only=outliers_only) ##### omegalyze ################################################################ if (self.omegalyze is not None) : make_sub_header("Omegalyze analysis", out=out) self.omegalyze.show(out=out, prefix=" ", outliers_only=outliers_only) ##### omegalyze ################################################################ if (self.rotalyze is not None) : make_sub_header("Sidechain rotamers", out=out) self.rotalyze.show(out=out, prefix=" ", outliers_only=outliers_only) if (self.cbetadev is not None) : make_sub_header("C-beta deviations", out=out) self.cbetadev.show(out=out, prefix=" ", outliers_only=outliers_only) if (self.clashes is not None) : make_sub_header("Bad clashes", out=out) self.clashes.show(out=out, prefix=" ") if (self.nqh_flips is not None) : make_sub_header("Asn/Gln/His flips", out=out) self.nqh_flips.show(out=out, prefix=" ") if (self.rna is not None) : make_header("RNA validation", out=out) self.rna.show(out=out, prefix=" ", outliers_only=outliers_only) if (not suppress_summary) : make_header("Summary", out=out) self.show_summary(out=out, prefix=" ", show_percentiles=show_percentiles) return self
def run(args, out=None): if (out is None): out = sys.stdout make_header("mmtbx.simulate_low_res_data", out=out) print(""" For generation of realistic data (model-based, or using real high-resolution data) for methods development. *********************************** WARNING: *********************************** this is an experimental program - definitely NOT bug-free. Use at your own risk! Usage: mmtbx.simulate_low_res_data model.pdb [options...] (generate data from a PDB file) mmtbx.simulate_low_res_data highres.mtz [model.pdb] [options...] (truncate high-resolution data) mmtbx.simulate_low_res_data --help (print full parameters with additional info) """, file=out) if (len(args) == 0) or ("--help" in args): print("# full parameters:", file=out) if ("--help" in args): master_phil.show(attributes_level=1) else: master_phil.show() return from iotbx import file_reader interpreter = master_phil.command_line_argument_interpreter( home_scope="simulate_data") pdb_in = None pdb_hierarchy = None hkl_in = None user_phil = [] for arg in args: if os.path.isfile(arg): f = file_reader.any_file(arg) if (f.file_type == "pdb"): pdb_in = f.file_object user_phil.append( interpreter.process(arg="pdb_file=%s" % f.file_name)) elif (f.file_type == "hkl"): hkl_in = f.file_object user_phil.append( interpreter.process(arg="hkl_file=%s" % f.file_name)) elif (f.file_type == "phil"): user_phil.append(f.file_object) else: try: arg_phil = interpreter.process(arg=arg) except RuntimeError: print("ignoring uninterpretable argument '%s'" % arg, file=out) else: user_phil.append(arg_phil) working_phil = master_phil.fetch(sources=user_phil) make_header("Working parameters", out=out) working_phil.show(prefix=" ") params_ = working_phil.extract() params = params_.simulate_data prepare_data(params=params, hkl_in=hkl_in, pdb_in=pdb_in, out=out)
def start_coot_and_wait ( pdb_file, map_file, data_file, work_dir=None, coot_cmd="coot", needs_rebuild=False, log=None) : if (log is None) : log = sys.stdout if (work_dir is None) : work_dir = os.getcwd() if (not os.path.isdir(work_dir)) : os.makedirs(work_dir) import mmtbx.maps.utils from libtbx.str_utils import make_header from libtbx import easy_run from libtbx import group_args import cootbx base_script = __file__.replace(".pyc", ".py") os.chdir(work_dir) if (os.path.exists("coot_out_tmp.pdb")) : os.remove("coot_out_tmp.pdb") if (os.path.exists("coot_out.pdb")) : os.remove("coot_out.pdb") f = open("edit_in_coot.py", "w") f.write(open(base_script).read()) f.write("\n") f.write("import coot\n") cootbx.write_disable_nomenclature_errors(f) f.write("m = manager(\"%s\", \"%s\", needs_rebuild=%s)\n" % (pdb_file, map_file, needs_rebuild)) f.close() make_header("Interactive editing in Coot", log) easy_run.call("\"%s\" --no-state-script --script edit_in_coot.py &" % coot_cmd) print >> log, " Waiting for coot_out_tmp.pdb to appear at %s" % \ str(time.asctime()) base_dir = os.path.dirname(pdb_file) tmp_file = os.path.join(base_dir, "coot_out_tmp.pdb") edit_file = os.path.join(base_dir, "coot_tmp_edits.pdb") maps_file = os.path.join(base_dir, ".NEW_MAPS") while (True) : if (os.path.isfile(tmp_file)) : print >> log, " Coot editing complete at %s" % str(time.asctime()) break elif (os.path.isfile(maps_file)) : t1 = time.time() assert os.path.isfile(edit_file) mmtbx.maps.utils.create_map_from_pdb_and_mtz( pdb_file=edit_file, mtz_file=data_file, output_file=os.path.join(base_dir, "maps_for_coot.mtz"), fill=True, out=log) t2 = time.time() print >> log, "Calculated new map coefficients in %.1fs" % (t2-t1) os.remove(maps_file) else : time.sleep(t_wait/1000.) shutil.move(tmp_file, "coot_out.pdb") mmtbx.maps.utils.create_map_from_pdb_and_mtz( pdb_file="coot_out.pdb", mtz_file=data_file, output_file="coot_out_maps.mtz", fill=True, out=log) new_model = os.path.join(work_dir, "coot_out.pdb") new_map = os.path.join(work_dir, "coot_out_maps.mtz") skip_rebuild = None if (needs_rebuild) : if (os.path.isfile(os.path.join(base_dir, "NO_BUILD"))) : skip_rebuild = True else : skip_rebuild = False return group_args( pdb_file=new_model, map_file=new_map, skip_rebuild=skip_rebuild)
def find_peaks_holes ( fmodel, pdb_hierarchy, params=None, map_cutoff=3.0, anom_map_cutoff=3.0, filter_peaks_by_2fofc=None, use_phaser_if_available=True, return_llg_map=False, include_peaks_near_model=False, out=None) : """ Find peaks and holes in mFo-DFc map, plus flag solvent atoms with suspiciously high mFo-DFc values, plus anomalous peaks if anomalous data are present. Returns a pickle-able object storing all this information (with the ability to write out a PDB file with the sites of interest). """ if (out is None) : out = sys.stdout if (params is None) : params = master_phil.fetch().extract().find_peaks if (include_peaks_near_model) : params.map_next_to_model.min_model_peak_dist = 0 pdb_atoms = pdb_hierarchy.atoms() unit_cell = fmodel.xray_structure.unit_cell() from mmtbx import find_peaks from cctbx import maptbx f_map = None if (filter_peaks_by_2fofc is not None) : f_map_ = fmodel.electron_density_map().fft_map( resolution_factor=params.resolution_factor, symmetry_flags=maptbx.use_space_group_symmetry, map_type="2mFo-DFc", use_all_data=True) f_map_.apply_sigma_scaling() f_map = f_map_.real_map() make_header("Positive difference map peaks", out=out) peaks_result = find_peaks.manager( fmodel=fmodel, map_type="mFo-DFc", map_cutoff=map_cutoff, params=params, log=out) peaks_result.peaks_mapped() peaks_result.show_mapped(pdb_atoms) peaks = peaks_result.peaks() if (filter_peaks_by_2fofc is not None) : n_removed = peaks.filter_by_secondary_map( map=f_map, min_value=filter_peaks_by_2fofc) print >> out, "" print >> out, "%d peaks remaining after 2mFo-DFc filtering" % \ len(peaks.sites) # very important - sites are initially fractional coordinates! peaks.sites = unit_cell.orthogonalize(peaks.sites) print >> out, "" out.flush() make_header("Negative difference map holes", out=out) holes_result = find_peaks.manager( fmodel=fmodel, map_type="mFo-DFc", map_cutoff=-map_cutoff, params=params, log=out) holes_result.peaks_mapped() holes_result.show_mapped(pdb_atoms) holes = holes_result.peaks() # XXX is this useful? #if (filter_peaks_by_2fofc is not None) : # holes.filter_by_secondary_map( # map=f_map, # min_value=filter_peaks_by_2fofc) holes.sites = unit_cell.orthogonalize(holes.sites) print >> out, "" out.flush() anom = None anom_map_coeffs = None if (fmodel.f_obs().anomalous_flag()) : make_header("Anomalous difference map peaks", out=out) anom_map_type = "anom_residual" if ((use_phaser_if_available) and (libtbx.env.has_module("phaser")) and (not fmodel.twin)) : import mmtbx.map_tools print >> out, "Will use Phaser LLG map" anom_map_type = None anom_map_coeffs = mmtbx.map_tools.get_phaser_sad_llg_map_coefficients( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, log=out) anom_result = find_peaks.manager( fmodel=fmodel, map_type=anom_map_type, map_coeffs=anom_map_coeffs, map_cutoff=anom_map_cutoff, params=params, log=out) anom_result.peaks_mapped() anom_result.show_mapped(pdb_atoms) anom = anom_result.peaks() if (filter_peaks_by_2fofc is not None) : anom.filter_by_secondary_map( map=f_map, min_value=filter_peaks_by_2fofc) print >> out, "" print >> out, "%d peaks remaining after 2mFo-DFc filtering" % \ len(anom.sites) anom.sites = unit_cell.orthogonalize(anom.sites) print >> out, "" out.flush() anom_map = None cache = pdb_hierarchy.atom_selection_cache() sites_frac = fmodel.xray_structure.sites_frac() water_isel = cache.selection( "resname HOH and not (element H or element D)").iselection() waters_out = [None, None] if (len(water_isel) > 0) : map_types = ["mFo-DFc"] map_cutoffs = [ map_cutoff ] if (fmodel.f_obs().anomalous_flag()) : map_types.append("anomalous") map_cutoffs.append(anom_map_cutoff) for k, map_type in enumerate(map_types) : fft_map = None # re-use Phaser LLG map if it was previously calculated if (map_type == "anomalous") and (anom_map_coeffs is not None) : fft_map = anom_map_coeffs.fft_map( resolution_factor=params.resolution_factor, symmetry_flags=maptbx.use_space_group_symmetry) else : fft_map = fmodel.electron_density_map().fft_map( resolution_factor=params.resolution_factor, symmetry_flags=maptbx.use_space_group_symmetry, map_type=map_type, use_all_data=True) real_map = fft_map.apply_sigma_scaling().real_map_unpadded() if (map_type == "anomalous") : anom_map = real_map suspicious_waters = [] for i_seq in water_isel : atom = pdb_atoms[i_seq] rho = real_map.tricubic_interpolation(sites_frac[i_seq]) if (rho >= map_cutoffs[k]) : peak = water_peak( id_str=atom.id_str(), xyz=atom.xyz, peak_height=rho, map_type=map_type) suspicious_waters.append(peak) if (len(suspicious_waters) > 0) : make_header("Water molecules with %s peaks" % map_type, out=out) for peak in suspicious_waters : peak.show(out=out) print >> out, "" waters_out[k] = suspicious_waters non_water_anom_peaks = None if (fmodel.f_obs().anomalous_flag()) : non_water_anom_peaks = [] if (anom_map is None) : fft_map = fmodel.electron_density_map().fft_map( resolution_factor=params.resolution_factor, symmetry_flags=maptbx.use_space_group_symmetry, map_type="anom", use_all_data=True) anom_map = fft_map.apply_sigma_scaling().real_map_unpadded() non_water_non_H_i_sel = cache.selection( "not (resname HOH or element H or element D)").iselection() for i_seq in non_water_non_H_i_sel : rho = anom_map.tricubic_interpolation(sites_frac[i_seq]) if (rho >= anom_map_cutoff) : atom = pdb_atoms[i_seq] peak = water_peak( id_str=atom.id_str(), xyz=atom.xyz, peak_height=rho, map_type="anomalous") non_water_anom_peaks.append(peak) all_results = peaks_holes_container( peaks=peaks, holes=holes, anom_peaks=anom, map_cutoff=map_cutoff, anom_map_cutoff=anom_map_cutoff, water_peaks=waters_out[0], water_anom_peaks=waters_out[1], non_water_anom_peaks=non_water_anom_peaks) all_results.show_summary(out=out) if (return_llg_map) : return all_results, anom_map_coeffs return all_results
def run (args, out=None, verbose=True, plots_dir=None) : t0 = time.time() if (out is None) : out = sys.stdout import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="model", reflection_file_def="map_coeffs", map_file_def="map_file", usage_string="""\ phenix.emringer model.pdb map.mrc [cif_file ...] [options] %s """ % __doc__) params = cmdline.work.extract() validate_params(params) pdb_in = cmdline.get_file(params.model) pdb_in.check_file_type("pdb") hierarchy = pdb_in.file_object.construct_hierarchy() hierarchy.atoms().reset_i_seq() map_coeffs = ccp4_map = None if (params.map_coeffs is not None) : mtz_in = cmdline.get_file(params.map_coeffs) mtz_in.check_file_type("hkl") best_guess = None best_labels = [] all_labels = [] for array in mtz_in.file_server.miller_arrays : if (array.info().label_string() == params.map_label) : map_coeffs = array break elif (params.map_label is None) : if (array.is_complex_array()) : labels = array.info().label_string() all_labels.append(labels) if (labels.startswith("2FOFCWT") or labels.startswith("2mFoDFc") or labels.startswith("FWT")) : best_guess = array best_labels.append(labels) if (map_coeffs is None) : if (len(all_labels) == 0) : raise Sorry("No valid (pre-weighted) map coefficients found in file.") elif (best_guess is None) : raise Sorry("Couldn't automatically determine appropriate map labels. "+ "Choices:\n %s" % " \n".join(all_labels)) elif (len(best_labels) > 1) : raise Sorry("Multiple appropriate map coefficients found in file. "+ "Choices:\n %s" % "\n ".join(best_labels)) map_coeffs = best_guess print >> out, " Guessing %s for input map coefficients" % best_labels[0] else : ccp4_map_in = cmdline.get_file(params.map_file) ccp4_map_in.check_file_type("ccp4_map") ccp4_map = ccp4_map_in.file_object make_header("Iterating over residues", out=out) t1 = time.time() from mmtbx.ringer import iterate_over_residues results = iterate_over_residues( pdb_hierarchy=hierarchy, map_coeffs=map_coeffs, ccp4_map=ccp4_map, params=params, log=out).results t2 = time.time() if (verbose) : print >> out, "Time excluding I/O: %8.1fs" % (t2 - t1) print >> out, "Overall runtime: %8.1fs" % (t2 - t0) if (params.output_base is None) : pdb_base = os.path.basename(params.model) params.output_base = os.path.splitext(pdb_base)[0] + "_emringer" easy_pickle.dump("%s.pkl" % params.output_base, results) print >> out, "Wrote %s.pkl" % params.output_base csv = "\n".join([ r.format_csv() for r in results ]) open("%s.csv" % params.output_base, "w").write(csv) print >> out, "Wrote %s.csv" % params.output_base if (plots_dir is None) : plots_dir = params.output_base + "_plots" if (not os.path.isdir(plots_dir)) : os.makedirs(plots_dir) from mmtbx.ringer import em_rolling from mmtbx.ringer import em_scoring import matplotlib matplotlib.use("Agg") make_header("Scoring results", out=out) scoring = em_scoring.main( file_name=params.output_base, ringer_result=results, out_dir=plots_dir, sampling_angle=params.sampling_angle, quiet=False, out=out) make_header("Inspecting chains", out=out) rolling_window_threshold = params.rolling_window_threshold rolling = em_rolling.main( ringer_results=results, dir_name=plots_dir, threshold=rolling_window_threshold, #scoring.optimal_threshold, graph=False, save=True, out=out) scoring.show_summary(out=out) print >> out, "\nReferences:" print >> out, """\ Barad BA, Echols N, Wang RYR, Cheng YC, DiMaio F, Adams PD, Fraser JS. (2015) Side-chain-directed model and map validation for 3D Electron Cryomicroscopy. Nature Methods, in press. Lang PT, Ng HL, Fraser JS, Corn JE, Echols N, Sales M, Holton JM, Alber T. Automated electron-density sampling reveals widespread conformational polymorphism in proteins. Protein Sci. 2010 Jul;19(7):1420-31. PubMed PMID: 20499387""" if (params.show_gui) : run_app(results) else : return (results, scoring, rolling)
def start_coot_and_wait ( pdb_file, map_file, ligand_files, ligand_ccs, cif_files=(), work_dir=None, coot_cmd="coot", log=None) : from iotbx import file_reader from libtbx.str_utils import make_header from libtbx import easy_run import cootbx assert (len(ligand_files) > 0) and (len(ligand_files) == len(ligand_ccs)) if (log is None) : log = sys.stdout cwd = os.getcwd() if (work_dir is None) : work_dir = cwd if (not os.path.isdir(work_dir)) : os.makedirs(work_dir) os.chdir(work_dir) base_script = __file__.replace(".pyc", ".py") ligand_xyzs = [] for pdb_file in ligand_files : pdb_file = to_str(pdb_file) pdb_in = file_reader.any_file(pdb_file, force_type="pdb") pdb_in.assert_file_type("pdb") coords = pdb_in.file_object.atoms().extract_xyz() ligand_xyzs.append(coords.mean()) ligand_info = zip(ligand_files, ligand_ccs, ligand_xyzs) f = open("edit_in_coot.py", "w") f.write(open(base_script).read()) f.write("\n") f.write("import coot\n") cootbx.write_disable_nomenclature_errors(f) f.write("read_pdb(\"%s\")\n" % to_str(pdb_file)) f.write("auto_read_make_and_draw_maps(\"%s\")\n" % to_str(map_file)) for cif_file in cif_files : f.write("read_cif_dictionary(\"%s\")\n" % to_str(cif_file)) f.write("m = manager(%s)\n" % str(ligand_info)) f.close() make_header("Ligand selection in Coot", log) rc = easy_run.call("\"%s\" --no-state-script --script edit_in_coot.py &" % coot_cmd) if (rc != 0) : raise RuntimeError("Launching Coot failed with status %d" % rc) print >> log, " Waiting for user input at %s" % str(time.asctime()) out_file = ".COOT_LIGANDS" output_files = output_ccs = None while (True) : if (os.path.isfile(out_file)) : print >> log, " Coot editing complete at %s" % str(time.asctime()) ligand_indices = [ int(i) for i in open(out_file).read().split() ] output_files = [] for i in ligand_indices : ligand_file = os.path.join(work_dir, "coot_ligand_out_%d.pdb" % (i+1)) output_files.append(ligand_file) output_ccs = [ ligand_ccs[i] for i in ligand_indices ] break else : time.sleep(t_wait / 1000.) assert (output_files is not None) os.chdir(cwd) return output_files, output_ccs
def build_cycle (pdb_hierarchy, fmodel, geometry_restraints_manager, params, selection=None, cif_objects=(), nproc=Auto, out=sys.stdout, verbose=False, debug=None, i_cycle=0) : from mmtbx import restraints from scitbx.array_family import flex t_start = time.time() hd_sel = fmodel.xray_structure.hd_selection() n_hydrogen = hd_sel.count(True) if (n_hydrogen > 0) and (True) : #params.building.delete_hydrogens) : print >> out, "WARNING: %d hydrogen atoms will be removed!" % n_hydrogen non_hd_sel = ~hd_sel # XXX it's better to do this in-place for the hierarchy, because calling # pdb_hierarchy.select(non_hd_sel) will not remove parent-child # relationships involving hydrogens, which causes problems when running # the MolProbity validation. pdb_hierarchy.remove_hd(reset_i_seq=True) xray_structure = fmodel.xray_structure.select(non_hd_sel) assert (pdb_hierarchy.atoms_size() == xray_structure.scatterers().size()) fmodel.update_xray_structure(xray_structure) geometry_restraints_manager = geometry_restraints_manager.select(non_hd_sel) pdb_atoms = pdb_hierarchy.atoms() segids = pdb_atoms.extract_segid().strip() if (not segids.all_eq("")) : print >> out, "WARNING: resetting segids to blank" for i_seq, atom in enumerate(pdb_atoms) : atom.segid = "" sc = fmodel.xray_structure.scatterers()[i_seq] sc.label = atom.id_str() if isinstance(selection, str) : sele_cache = pdb_hierarchy.atom_selection_cache() selection = sele_cache.selection(selection) make_header("Build cycle %d" % (i_cycle+1), out=out) fmodel.info().show_rfactors_targets_scales_overall(out=out) if (debug > 0) : from mmtbx.maps.utils import get_maps_from_fmodel from iotbx.map_tools import write_map_coeffs two_fofc, fofc = get_maps_from_fmodel(fmodel, exclude_free_r_reflections=True) write_map_coeffs( fwt_coeffs=two_fofc, delfwt_coeffs=fofc, file_name="cycle_%d_start.mtz" % (i_cycle+1)) candidate_residues = alt_confs.filter_before_build( pdb_hierarchy=pdb_hierarchy, fmodel=fmodel, geometry_restraints_manager=geometry_restraints_manager, selection=selection, params=params.prefilter, verbose=verbose, log=out) t1 = time.time() print >> out, "filtering: %.3fs" % (t1-t_start) restraints_manager = restraints.manager( geometry=geometry_restraints_manager, normalization=True) make_sub_header("Finding alternate conformations", out=out) building_trials = find_all_alternates( residues=candidate_residues, pdb_hierarchy=pdb_hierarchy, restraints_manager=restraints_manager, fmodel=fmodel, params=params.residue_fitting, nproc=params.nproc, verbose=verbose, debug=debug, log=out).results t2 = time.time() print >> out, " building: %.3fs" % (t2-t1) make_sub_header("Scoring and assembling alternates", out=out) n_alternates = process_results( pdb_hierarchy=pdb_hierarchy, fmodel=fmodel, residues_in=candidate_residues, building_trials=building_trials, params=params.residue_fitting, verbose=verbose, log=out) if (n_alternates > 0) : print >> out, "" print >> out, " %d disordered residues built" % n_alternates n_split = alt_confs.spread_alternates(pdb_hierarchy, new_occupancy=params.residue_fitting.expected_occupancy, split_all_adjacent=True, log=out) assert (n_split > 0) print >> out, " %d adjacent residues split" % n_split else : print >> out, "No alternates built this round." t3 = time.time() print >> out, " assembly: %.3fs" % (t3-t2) if (not params.cleanup.rsr_after_build) : if (n_alternates > 0) : print >> out, "Skipping final RSR step (rsr_after_build=False)." else : print >> out, "No refinement needs to be performed." else : make_sub_header("Real-space refinement", out=out) print >> out, "" pdb_hierarchy = real_space_refine( pdb_hierarchy=pdb_hierarchy, fmodel=fmodel, cif_objects=cif_objects, params=params, nproc=params.nproc, remediate=True, out=out) t4 = time.time() print >> out, "" print >> out, "RSR: %.3fs" % (t4-t3) fmodel.info().show_targets(out=out, text="Rebuilt model") t_end = time.time() alt_confs.finalize_model( pdb_hierarchy=pdb_hierarchy, xray_structure=pdb_hierarchy.extract_xray_structure( crystal_symmetry=fmodel.xray_structure), set_b_iso=params.cleanup.set_b_iso, convert_to_isotropic=params.cleanup.convert_to_isotropic, selection="altloc A or altloc B") t_end = time.time() print >> out, "Total runtime for cycle: %.3fs" % (t_end-t_start) return pdb_hierarchy, n_alternates
def show(self, out=sys.stdout, outliers_only=True, suppress_summary=False, show_percentiles=False): """ Comprehensive output with individual outlier lists, plus summary. """ if (self.xtriage is not None): self.xtriage.summarize_issues().show(out=out) if (self.data_stats is not None): make_header("Experimental data", out=out) self.data_stats.show(out=out, prefix=" ") if (self.real_space is not None): make_sub_header("Residues with poor real-space CC", out=out) self.real_space.show(out=out, prefix=" ") if (self.waters is not None): make_sub_header("Suspicious water molecules", out=out) self.waters.show(out=out, prefix=" ") if (self.model_stats is not None): make_header("Model properties", out=out) self.model_stats.show(prefix=" ", out=out) if (self.restraints is not None): make_header("Geometry restraints", out=out) self.restraints.show(out=out, prefix=" ") make_header("Molprobity validation", out=out) self.model_statistics_geometry.show(log=out, prefix=" ", lowercase=True) if (self.nqh_flips is not None): make_sub_header("Asn/Gln/His flips", out=out) self.nqh_flips.show(out=out, prefix=" ") if (self.rna is not None): make_header("RNA validation", out=out) self.rna.show(out=out, prefix=" ", outliers_only=outliers_only) if (not suppress_summary): make_header("Summary", out=out) self.show_summary(out=out, prefix=" ", show_percentiles=show_percentiles) return self
def start_coot_and_wait(pdb_file, map_file, ligand_files, ligand_ccs, cif_files=(), work_dir=None, coot_cmd="coot", log=None): from iotbx import file_reader from libtbx.str_utils import make_header from libtbx import easy_run import cootbx assert (len(ligand_files) > 0) and (len(ligand_files) == len(ligand_ccs)) if (log is None): log = sys.stdout cwd = os.getcwd() if (work_dir is None): work_dir = cwd if (not os.path.isdir(work_dir)): os.makedirs(work_dir) os.chdir(work_dir) base_script = __file__.replace(".pyc", ".py") ligand_xyzs = [] for pdb_file in ligand_files: pdb_file = to_str(pdb_file) pdb_in = file_reader.any_file(pdb_file, force_type="pdb") pdb_in.assert_file_type("pdb") coords = pdb_in.file_object.atoms().extract_xyz() ligand_xyzs.append(coords.mean()) ligand_info = zip(ligand_files, ligand_ccs, ligand_xyzs) f = open("edit_in_coot.py", "w") f.write(open(base_script).read()) f.write("\n") f.write("import coot\n") cootbx.write_disable_nomenclature_errors(f) f.write("read_pdb(\"%s\")\n" % to_str(pdb_file)) f.write("auto_read_make_and_draw_maps(\"%s\")\n" % to_str(map_file)) for cif_file in cif_files: f.write("read_cif_dictionary(\"%s\")\n" % to_str(cif_file)) f.write("m = manager(%s)\n" % str(ligand_info)) f.close() make_header("Ligand selection in Coot", log) rc = easy_run.call("\"%s\" --no-state-script --script edit_in_coot.py &" % coot_cmd) if (rc != 0): raise RuntimeError("Launching Coot failed with status %d" % rc) print >> log, " Waiting for user input at %s" % str(time.asctime()) out_file = ".COOT_LIGANDS" output_files = output_ccs = None while (True): if (os.path.isfile(out_file)): print >> log, " Coot editing complete at %s" % str(time.asctime()) ligand_indices = [int(i) for i in open(out_file).read().split()] output_files = [] for i in ligand_indices: ligand_file = os.path.join(work_dir, "coot_ligand_out_%d.pdb" % (i + 1)) output_files.append(ligand_file) output_ccs = [ligand_ccs[i] for i in ligand_indices] break else: time.sleep(t_wait / 1000.) assert (output_files is not None) os.chdir(cwd) return output_files, output_ccs
def show_header (self, text) : make_header(text, out=self.out)
def run(args, out=sys.stdout): from mmtbx.building import alternate_conformations import mmtbx.command_line import mmtbx.building import iotbx.pdb.hierarchy cmdline = mmtbx.command_line.load_model_and_data( args=args, master_phil=get_master_phil(), process_pdb_file=True, create_fmodel=True, out=out, usage_string="""\ mmtbx.generate_disorder model.pdb data.mtz selection="resname ATP" [occ=0.6] Perform simulatead annealing against an mFo-DFc map to generate possible alternate conformations for a selection of atoms. For development purposes and experimentation only. """) params = cmdline.params fmodel = cmdline.fmodel validate_params(params) pdb_hierarchy = cmdline.pdb_hierarchy make_header("Generating disorder", out=out) a_c_p = cmdline.processed_pdb_file.all_chain_proxies selection = a_c_p.selection(params.selection) if (params.whole_residues): selection = iotbx.pdb.atom_selection.expand_selection_to_entire_atom_groups( selection=selection, pdb_atoms=pdb_hierarchy.atoms()) n_sel = selection.count(True) assert (n_sel > 0) print >> out, "%d atoms selected" % n_sel selection_delete = None if (params.selection_delete is not None): selection_delete = a_c_p.selection(params.selection_delete) two_fofc_map, fofc_map = alternate_conformations.get_partial_omit_map( fmodel=fmodel.deep_copy(), selection=selection, selection_delete=selection_delete, negate_surrounding=params.negate_surrounding_sites, map_file_name=params.output.map_file_name, partial_occupancy=params.occ, resolution_factor=params.resolution_factor) target_map = fofc_map if (params.target_map == "2mFo-DFc"): target_map = two_fofc_map annealer = annealing_manager(xray_structure=fmodel.xray_structure, pdb_hierarchy=pdb_hierarchy, processed_pdb_file=cmdline.processed_pdb_file, target_map=target_map, two_fofc_map=two_fofc_map, d_min=fmodel.f_obs().d_min(), params=params, selection=selection, resolution_factor=params.resolution_factor, out=out, debug=params.output.debug) sites_ref = pdb_hierarchy.atoms().extract_xyz().deep_copy() sites_all = easy_mp.pool_map(fixed_func=annealer, iterable=range(params.n_confs), processes=params.nproc) ensemble = iotbx.pdb.hierarchy.root() if (params.output.include_starting_model): sites_all.insert(0, sites_ref) rmsds = [] for i_conf, sites_new in enumerate(sites_all): assert (sites_new is not None) model = pdb_hierarchy.only_model().detached_copy() model.atoms().set_xyz(sites_new) model.id = str(i_conf + 1) rmsd = sites_new.select(selection).rms_difference( sites_ref.select(selection)) print >> out, "Model %d: rmsd=%.3f" % (i_conf + 1, rmsd) rmsds.append(rmsd) ensemble.append_model(model) f = open(params.output.file_name, "w") f.write(ensemble.as_pdb_string(crystal_symmetry=fmodel.xray_structure)) f.close() print >> out, "Wrote ensemble model to %s" % params.output.file_name return rmsds
def find_peaks_holes(fmodel, pdb_hierarchy, params=None, map_cutoff=3.0, anom_map_cutoff=3.0, filter_peaks_by_2fofc=None, use_phaser_if_available=True, return_llg_map=False, include_peaks_near_model=False, out=None): """ Find peaks and holes in mFo-DFc map, plus flag solvent atoms with suspiciously high mFo-DFc values, plus anomalous peaks if anomalous data are present. Returns a pickle-able object storing all this information (with the ability to write out a PDB file with the sites of interest). """ if (out is None): out = sys.stdout if (params is None): params = master_phil.fetch().extract().find_peaks if (include_peaks_near_model): params.map_next_to_model.min_model_peak_dist = 0 pdb_atoms = pdb_hierarchy.atoms() unit_cell = fmodel.xray_structure.unit_cell() from mmtbx import find_peaks from cctbx import maptbx f_map = None if (filter_peaks_by_2fofc is not None): f_map_ = fmodel.electron_density_map().fft_map( resolution_factor=params.resolution_factor, symmetry_flags=maptbx.use_space_group_symmetry, map_type="2mFo-DFc", use_all_data=True) f_map_.apply_sigma_scaling() f_map = f_map_.real_map() make_header("Positive difference map peaks", out=out) peaks_result = find_peaks.manager(fmodel=fmodel, map_type="mFo-DFc", map_cutoff=map_cutoff, params=params, log=out) peaks_result.peaks_mapped() peaks_result.show_mapped(pdb_atoms) peaks = peaks_result.peaks() if (filter_peaks_by_2fofc is not None): n_removed = peaks.filter_by_secondary_map( map=f_map, min_value=filter_peaks_by_2fofc) print >> out, "" print >> out, "%d peaks remaining after 2mFo-DFc filtering" % \ len(peaks.sites) # very important - sites are initially fractional coordinates! peaks.sites = unit_cell.orthogonalize(peaks.sites) print >> out, "" out.flush() make_header("Negative difference map holes", out=out) holes_result = find_peaks.manager(fmodel=fmodel, map_type="mFo-DFc", map_cutoff=-map_cutoff, params=params, log=out) holes_result.peaks_mapped() holes_result.show_mapped(pdb_atoms) holes = holes_result.peaks() # XXX is this useful? #if (filter_peaks_by_2fofc is not None) : # holes.filter_by_secondary_map( # map=f_map, # min_value=filter_peaks_by_2fofc) holes.sites = unit_cell.orthogonalize(holes.sites) print >> out, "" out.flush() anom = None anom_map_coeffs = None if (fmodel.f_obs().anomalous_flag()): make_header("Anomalous difference map peaks", out=out) anom_map_type = "anom_residual" if ((use_phaser_if_available) and (libtbx.env.has_module("phaser")) and (not fmodel.twin)): import mmtbx.map_tools print >> out, "Will use Phaser LLG map" anom_map_type = None anom_map_coeffs = mmtbx.map_tools.get_phaser_sad_llg_map_coefficients( fmodel=fmodel, pdb_hierarchy=pdb_hierarchy, log=out) anom_result = find_peaks.manager(fmodel=fmodel, map_type=anom_map_type, map_coeffs=anom_map_coeffs, map_cutoff=anom_map_cutoff, params=params, log=out) anom_result.peaks_mapped() anom_result.show_mapped(pdb_atoms) anom = anom_result.peaks() if (filter_peaks_by_2fofc is not None): anom.filter_by_secondary_map(map=f_map, min_value=filter_peaks_by_2fofc) print >> out, "" print >> out, "%d peaks remaining after 2mFo-DFc filtering" % \ len(anom.sites) anom.sites = unit_cell.orthogonalize(anom.sites) print >> out, "" out.flush() anom_map = None cache = pdb_hierarchy.atom_selection_cache() sites_frac = fmodel.xray_structure.sites_frac() water_isel = cache.selection( "resname HOH and not (element H or element D)").iselection() waters_out = [None, None] if (len(water_isel) > 0): map_types = ["mFo-DFc"] map_cutoffs = [map_cutoff] if (fmodel.f_obs().anomalous_flag()): map_types.append("anomalous") map_cutoffs.append(anom_map_cutoff) for k, map_type in enumerate(map_types): fft_map = None # re-use Phaser LLG map if it was previously calculated if (map_type == "anomalous") and (anom_map_coeffs is not None): fft_map = anom_map_coeffs.fft_map( resolution_factor=params.resolution_factor, symmetry_flags=maptbx.use_space_group_symmetry) else: fft_map = fmodel.electron_density_map().fft_map( resolution_factor=params.resolution_factor, symmetry_flags=maptbx.use_space_group_symmetry, map_type=map_type, use_all_data=True) real_map = fft_map.apply_sigma_scaling().real_map_unpadded() if (map_type == "anomalous"): anom_map = real_map suspicious_waters = [] for i_seq in water_isel: atom = pdb_atoms[i_seq] rho = real_map.tricubic_interpolation(sites_frac[i_seq]) if (rho >= map_cutoffs[k]): peak = water_peak(id_str=atom.id_str(), xyz=atom.xyz, peak_height=rho, map_type=map_type) suspicious_waters.append(peak) if (len(suspicious_waters) > 0): make_header("Water molecules with %s peaks" % map_type, out=out) for peak in suspicious_waters: peak.show(out=out) print >> out, "" waters_out[k] = suspicious_waters non_water_anom_peaks = None if (fmodel.f_obs().anomalous_flag()): non_water_anom_peaks = [] if (anom_map is None): fft_map = fmodel.electron_density_map().fft_map( resolution_factor=params.resolution_factor, symmetry_flags=maptbx.use_space_group_symmetry, map_type="anom", use_all_data=True) anom_map = fft_map.apply_sigma_scaling().real_map_unpadded() non_water_non_H_i_sel = cache.selection( "not (resname HOH or element H or element D)").iselection() for i_seq in non_water_non_H_i_sel: rho = anom_map.tricubic_interpolation(sites_frac[i_seq]) if (rho >= anom_map_cutoff): atom = pdb_atoms[i_seq] peak = water_peak(id_str=atom.id_str(), xyz=atom.xyz, peak_height=rho, map_type="anomalous") non_water_anom_peaks.append(peak) all_results = peaks_holes_container( peaks=peaks, holes=holes, anom_peaks=anom, map_cutoff=map_cutoff, anom_map_cutoff=anom_map_cutoff, water_peaks=waters_out[0], water_anom_peaks=waters_out[1], non_water_anom_peaks=non_water_anom_peaks) all_results.show_summary(out=out) if (return_llg_map): return all_results, anom_map_coeffs return all_results
def __init__ (self, params, hkl_in=None, pdb_in=None, out=sys.stdout) : adopt_init_args(self, locals()) self.params = params self.out = out self.pdb_hierarchy = None if (params.pdb_file is None) and (params.hkl_file is None) : raise Sorry("No PDB file specified.") if (params.generate_noise.add_noise) and (params.hkl_file is None) : if (params.generate_noise.noise_profile_file is None) : raise Sorry("noise_profile_file required when add_noise=True and " "hkl_file is undefined.") if (pdb_in is None) and (params.pdb_file is not None) : f = file_reader.any_file(params.pdb_file, force_type="pdb") f.assert_file_type("pdb") self.pdb_in = f.file_object if (self.hkl_in is None) and (params.hkl_file is not None) : f = file_reader.any_file(params.hkl_File, force_type="hkl") f.assert_file_type("hkl") self.hkl_in = f.file_object if (self.pdb_in is not None) : self.pdb_hierarchy = self.pdb_in.hierarchy if (self.hkl_in is not None) : make_header("Extracting experimental data", out=sys.stdout) f_raw, r_free = self.from_hkl() elif (self.pdb_in is not None) : make_header("Generating fake data with phenix.fmodel", out=sys.stdout) f_raw, r_free = self.from_pdb() if (params.r_free_flags.file_name is not None) : f_raw, r_free = self.import_r_free_flags(f_raw) self.r_free = r_free make_header("Applying low-resolution filtering", out=sys.stdout) print >> out, " Target resolution: %.2f A" % params.d_min self.n_residues, self.n_bases = None, None if (self.pdb_in is not None) : self.n_residues, self.n_bases = get_counts(self.pdb_hierarchy) #if (params.auto_adjust) : # if (pdb_in is None) : # raise Sorry("You must supply a PDB file when auto_adjust=True.") self.f_out = self.truncate_data(f_raw) if (params.generate_noise.add_noise) : make_header("Adding noise using sigma profile", out=sys.stdout) if (self.f_out.sigmas() is None) : if (self.pdb_in is not None) : iso_scale, aniso_scale = wilson_scaling(self.f_out, self.n_residues, self.n_bases) i_obs = create_sigmas( f_obs=self.f_out, params=params.generate_noise, wilson_b=iso_scale.b_wilson, return_as_amplitudes=False) apply_sigma_noise(i_obs) self.f_out = i_obs.f_sq_as_f() make_header("Done processing", out=sys.stdout) print >> out, " Completeness after processing: %.2f%%" % ( self.f_out.completeness() * 100.) print >> out, " Final resolution: %.2f A" % self.f_out.d_min() if (self.pdb_in is not None) : iso_scale, aniso_scale = wilson_scaling(self.f_out, self.n_residues, self.n_bases) print >> out, "" print >> out, " Scaling statistics for output data:" show_b_factor_info(iso_scale, aniso_scale, out=out) print >> out, "" self.write_output()
def exercise(): from libtbx.test_utils import show_diff, Exception_expected from six.moves import cPickle as pickle # from libtbx.str_utils import split_keeping_spaces assert split_keeping_spaces(s="") == [] assert split_keeping_spaces(s=" ") == [" "] assert split_keeping_spaces(s="a") == ["a"] assert split_keeping_spaces(s="abc") == ["abc"] assert split_keeping_spaces(s=" a") == [" ", "a"] assert split_keeping_spaces(s=" a") == [" ", "a"] assert split_keeping_spaces(s=" abc") == [" ", "abc"] assert split_keeping_spaces(s=" abc ") == [" ", "abc", " "] assert split_keeping_spaces(s=" abc ") == [" ", "abc", " "] assert split_keeping_spaces(s="a ") == ["a", " "] assert split_keeping_spaces(s="a ") == ["a", " "] assert split_keeping_spaces(s="abc ") == ["abc", " "] assert split_keeping_spaces(s="a b") == ["a", " ", "b"] assert split_keeping_spaces(s="a b") == ["a", " ", "b"] assert split_keeping_spaces(s=" a b c d ") == [ " ", "a", " ", "b", " ", "c", " ", "d", " " ] # from libtbx.str_utils import size_as_string_with_commas assert size_as_string_with_commas(0) == "0" assert size_as_string_with_commas(1) == "1" assert size_as_string_with_commas(-1) == "-1" assert size_as_string_with_commas(10) == "10" assert size_as_string_with_commas(100) == "100" assert size_as_string_with_commas(1000) == "1,000" assert size_as_string_with_commas(12345) == "12,345" assert size_as_string_with_commas(12345678) == "12,345,678" assert size_as_string_with_commas(-12345678) == "-12,345,678" # from libtbx.str_utils import show_string assert show_string("abc") == '"abc"' assert show_string("a'c") == '"a\'c"' assert show_string('a"c') == "'a\"c'" assert show_string('\'"c') == '"\'\\"c"' # from libtbx.str_utils import prefix_each_line assert prefix_each_line(prefix="^", lines_as_one_string="""\ hello world""") == """\ ^hello ^world""" # from libtbx.str_utils import prefix_each_line_suffix assert prefix_each_line_suffix(prefix="^", lines_as_one_string="""\ hello world""", suffix=" ") == """\ ^hello ^world""" assert prefix_each_line_suffix(prefix="^", lines_as_one_string="""\ hello world""", suffix=" ", rstrip=False) == """\ ^hello%s ^world """ % " " # from libtbx.str_utils import show_sorted_by_counts from six.moves import cStringIO out = cStringIO() assert show_sorted_by_counts(label_count_pairs=[("b", 3), ("a", 3), ("c", -2)], out=out, prefix="%") assert not show_diff(out.getvalue(), """\ %"a" 3 %"b" 3 %"c" -2 """) out = cStringIO() assert show_sorted_by_counts(label_count_pairs=[("b", -3), ("a", -3), ("c", 2)], reverse=False, out=out, prefix="%", annotations=[None, "", "x"]) assert not show_diff(out.getvalue(), """\ %"a" -3 %"b" -3 %"c" 2 x """) # from libtbx.str_utils import line_breaker for string, expected_result in [ ("", [""]), ("this is", ["this is"]), ("this is a", ["this is", "a"]), ("this is a sentence", ["this is", "a", "sentence"]), ("this is a longer sentence", ["this is", "a", "longer", "sentence"]), ("this is a very long sentence indeed", ["this is", "a very", "long", "sentence", "indeed"]) ]: assert [block for block in line_breaker(string, width=7)] == expected_result # from libtbx.str_utils import StringIO out1 = cStringIO() out2 = StringIO() out3 = StringIO("Hello world!\n") print("Hello world!", file=out1) print("Hello world!", file=out2) try: print("Hello world!", file=out3) except AttributeError: pass else: raise Exception_expected out4 = pickle.loads(pickle.dumps(out2)) out5 = pickle.loads(pickle.dumps(out3)) assert out4.getvalue() == out1.getvalue() == out2.getvalue( ) == out5.getvalue() # from libtbx.str_utils import reformat_terminal_text txt1 = """ This is some terminal-formatted text which needs to be reset. """ assert (reformat_terminal_text(txt1) == "This is some terminal-formatted text which needs to be reset.") txt2 = """ This is more terminal-formatted text which needs to be reset. """ # from libtbx.str_utils import strip_lines, rstrip_lines lines = [" This is more ", " terminal-formatted ", " text "] assert ( strip_lines(txt2) == "\nThis is more\nterminal-formatted\ntext which needs\nto be reset.") assert ( rstrip_lines(txt2) == "\n This is more\n terminal-formatted\n text which needs\n to be reset." ) # from libtbx.str_utils import expandtabs_track_columns def check(s): es, js = expandtabs_track_columns(s=s) assert len(js) == len(s) assert es == s.expandtabs() sr = "".join([es[j] for j in js]) assert sr == s.replace("\t", " ") check("") check("\t") check("\t\t") check("\ty") check("x\ty") check("x\ty\tz") check("\txy\t\tz") check("abcdefg\txy\t\tz") check("ab defgh\txyz\t\tu") # from libtbx.str_utils import format_value assert format_value("%.4f", 1.2345678) == "1.2346" assert format_value("%.4f", None) == " None" assert format_value("%.4f", None, replace_none_with="---") == " ---" # from libtbx.str_utils import make_header out = StringIO() make_header("Header 1", out=out) assert (out.getvalue() == """ =================================== Header 1 ================================== """) out = StringIO() make_header("Header 2", out=out) assert (out.getvalue() == """ =================================== Header 2 ================================== """) # import sys from libtbx.str_utils import string_representation iset = list(range(130)) + list(range(250, 256)) for i in iset: s = chr(i) for j in iset: ss = s + chr(j) sr = string_representation(string=ss, preferred_quote="'", alternative_quote='"') if sys.hexversion < 0x03000000: assert sr == repr(ss) else: assert eval(sr) == ss from libtbx.str_utils import framed_output out = StringIO() box = framed_output(out, frame='#') print("Hello, world!", file=box) box.close() assert (out.getvalue() == """ ################# # Hello, world! # ################# """) out = StringIO() box = framed_output(out, frame='-', width=80, center=True, title="Refinement stats") box.write("r_free = 0.1234") box.write(" ") box.write("r_work = 0.1567") box.close() assert (out.getvalue() == """ |--------------------------------Refinement stats------------------------------| | r_free = 0.1234 r_work = 0.1567 | |------------------------------------------------------------------------------| """) out = StringIO() box = framed_output(out, frame='-', width=72, prefix=" ", title="Validation summary") print("Overall MolProbity score: 2.56", file=box) box.add_separator() print("""\ Ramachandran favored: 97.5 % outliers: 2.5 % Rotamer outliers: 5.9 % Clashscore: 10.9""", file=box) assert (out.getvalue() == "") del box assert (out.getvalue() == """ |-Validation summary---------------------------------------------------| | Overall MolProbity score: 2.56 | |----------------------------------------------------------------------| | Ramachandran favored: 97.5 % | | outliers: 2.5 % | | Rotamer outliers: 5.9 % | | Clashscore: 10.9 | |----------------------------------------------------------------------| """) from libtbx.str_utils import print_message_in_box out = StringIO() print_message_in_box( message="This is some terminal-formatted text which needs to be reset.", out=out, width=32, center=True, prefix=" ", frame='*') assert (out.getvalue() == """ ******************************** * This is some * * terminal-formatted text * * which needs to be reset. * ******************************** """) from libtbx.str_utils import make_big_header out = StringIO() make_big_header("Section title", out=out) assert (out.getvalue() == """ ################################################################################ # Section title # ################################################################################ """)
def run (args, out=None, verbose=True) : t0 = time.time() if (out is None) : out = sys.stdout if (len(args) == 0) : phil_out = StringIO() master_phil.show(out=phil_out, prefix=" ") raise Usage("ringer.py [model.pdb] [map.mtz] [cif_file ...] [options]\n"+ " Full parameters:\n%s" % phil_out.getvalue()) from iotbx import file_reader import iotbx.phil cmdline = iotbx.phil.process_command_line_with_files( args=args, master_phil=master_phil, pdb_file_def="pdb_file", reflection_file_def="map_coeffs", cif_file_def="cif_file") params = cmdline.work.extract() validate_params(params) pdb_in = file_reader.any_file(params.pdb_file, force_type="pdb") pdb_in.check_file_type("pdb") hierarchy = pdb_in.file_object.construct_hierarchy() hierarchy.atoms().reset_i_seq() mtz_in = file_reader.any_file(params.map_coeffs, force_type="hkl") mtz_in.check_file_type("hkl") map_coeffs = None best_guess = None best_labels = [] all_labels = [] for array in mtz_in.file_server.miller_arrays : if (array.info().label_string() == params.map_label) : map_coeffs = array break elif (params.map_label is None) : if (array.is_complex_array()) : labels = array.info().label_string() all_labels.append(labels) if (labels.startswith("2FOFCWT") or labels.startswith("2mFoDFc") or labels.startswith("FWT")) : best_guess = array best_labels.append(labels) if (map_coeffs is None) : if (len(all_labels) == 0) : raise Sorry("No valid (pre-weighted) map coefficients found in file.") elif (best_guess is None) : raise Sorry("Couldn't automatically determine appropriate map labels. "+ "Choices:\n %s" % " \n".join(all_labels)) elif (len(best_labels) > 1) : raise Sorry("Multiple appropriate map coefficients found in file. "+ "Choices:\n %s" % "\n ".join(best_labels)) map_coeffs = best_guess print >> out, " Guessing %s for input map coefficients" % best_labels[0] make_header("Iterating over residues", out=out) t1 = time.time() results = iterate_over_residues( pdb_hierarchy=hierarchy, map_coeffs=map_coeffs, params=params, log=out).results t2 = time.time() if (verbose) : print >> out, "Time excluding I/O: %8.1fs" % (t2 - t1) print >> out, "Overall runtime: %8.1fs" % (t2 - t0) if (params.output_base is None) : pdb_base = os.path.basename(params.pdb_file) params.output_base = os.path.splitext(pdb_base)[0] + "_ringer" easy_pickle.dump("%s.pkl" % params.output_base, results) print >> out, "Wrote %s.pkl" % params.output_base csv = "\n".join([ r.format_csv() for r in results ]) open("%s.csv" % params.output_base, "w").write(csv) print >> out, "Wrote %s.csv" % params.output_base if (params.show_gui) : run_app(results) else : return results