def run(args, command_name="distl.sweep_strength"): help_str = """\ Similar to distl.signal_strength, but acting on a sweep of images, with tabulation of the results and optional output of results as CSV file and plots of number of spots and resolution with image number. """ if (len(args) == 0 or args[0] in ["H", "h", "-H", "-h", "help", "--help", "-help"]): print "usage: %s image_prefix_*.img [parameter=value ...]" % command_name print "example: %s lysozyme_*.img distl.minimum_spot_area=8 plot.file_name=lysozyme.pdf" % command_name master_params.show(attributes_level=1, expert_level=1) print help_str return print "%s: characterization of candidate Bragg spots" % command_name phil_objects = [] argument_interpreter = master_params.command_line_argument_interpreter( home_scope="distl") image_file_names = [] moving_pdb_file_name = None for arg in args: if (os.path.isfile(arg)): image_file_names.append(arg) else: try: command_line_params = argument_interpreter.process(arg=arg) except KeyboardInterrupt: raise except Exception: raise Sorry("Unknown file or keyword: %s" % arg) else: phil_objects.append(command_line_params) if len(image_file_names) < 2: raise RuntimeError( "Please provide more than one file. Alternatively use " "distl.signal_strength to process a single image file.") working_params = master_params.fetch(sources=phil_objects) params = working_params.extract() if params.distl.verbosity > 0: print "#Parameters used:" print "#phil __ON__" print working_params = master_params.format(python_object=params) working_params.show(expert_level=1) print print "#phil __OFF__" print from spotfinder.applications import signal_strength spotfinder_results = run_sweep_strength(image_file_names, params) print_table(spotfinder_results.S, keys=["N_spots_inlier", "resolution"]) csv_file_name = params.distl.csv if csv_file_name is not None: with open(csv_file_name, 'wb') as f: as_csv(spotfinder_results.S, out=f) plot_file_name = params.distl.plot.file_name if plot_file_name is not None: plot(spotfinder_results.S, file_name=plot_file_name)
def run(args): phil = iotbx.phil.process_command_line(args=args, master_string=master_phil).show() usage = \ """ %s input.experiment=experimentname input.run_num=N input.address=address """%libtbx.env.dispatcher_name params = phil.work.extract() if not os.path.exists(params.output.output_dir): raise Sorry("Output path not found:" + params.output.output_dir) if params.input.experiment is None or \ params.input.run_num is None or \ params.input.address is None: raise Usage(usage) # set up psana if params.dispatch.events_accepted or params.dispatch.events_rejected: assert params.input.cfg is not None setConfigFile(params.input.cfg) dataset_name = "exp=%s:run=%s:idx" % (params.input.experiment, params.input.run_num) ds = DataSource(dataset_name) src = Source('DetInfo(%s)' % params.input.address) # set up multiprocessing with MPI from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() # each process in MPI has a unique id, 0-indexed size = comm.Get_size() # size: number of processes running in this job if params.dispatch.max_events is None: max_events = sys.maxsize else: max_events = params.dispatch.max_events if params.input.dark is not None: dark = easy_pickle.load('%s' % params.input.dark) for run in ds.runs(): times = run.times() if (params.dispatch.events_begin is None and params.dispatch.events_end is None): times = times[:] elif (params.dispatch.events_begin is not None and params.dispatch.events_end is None): times = times[params.dispatch.events_begin:] elif (params.dispatch.events_begin is None and params.dispatch.events_end is not None): times = times[:params.dispatch.events_end] elif (params.dispatch.events_begin is not None and params.dispatch.events_end is not None): times = times[params.dispatch.events_begin:params.dispatch. events_end] nevents = min(len(times), max_events) # chop the list into pieces, depending on rank. This assigns each process # events such that the get every Nth event where N is the number of processes mytimes = [times[i] for i in range(nevents) if (i + rank) % size == 0] print(len(mytimes)) #mytimes = mytimes[len(mytimes)-1000:len(mytimes)] totals = np.array([0.0]) print("initial totals", totals) for i, t in enumerate(mytimes): print("Event", i, "of", len(mytimes), end=' ') evt = run.event(t) if params.dispatch.events_accepted or params.dispatch.events_all: if evt.get("skip_event") == True: continue elif params.dispatch.events_rejected: if evt.get("skip_event") == False: continue try: data = evt.get(Camera.FrameV1, src) except ValueError as e: src = Source('BldInfo(%s)' % params.input.address) data = evt.get(Bld.BldDataSpectrometerV1, src) if data is None: print("No data") continue #set default to determine FEE data type two_D = False #check attribute of data for type try: data = np.array(data.data16().astype(np.int32)) two_D = True except AttributeError as e: data = np.array(data.hproj().astype(np.float64)) if two_D: if 'dark' in locals(): data = data - dark one_D_data = np.sum(data, 0) / data.shape[0] two_D_data = np.double(data) else: #used to fix underflow problem that was present in earlier release of psana and pressent for LH80 for i in range(len(data)): if data[i] > 1000000000: data[i] = data[i] - (2**32) if 'dark' in locals(): data = data - dark one_D_data = data totals[0] += 1 print("total good:", totals[0]) if not 'fee_one_D' in locals(): fee_one_D = one_D_data else: fee_one_D += one_D_data if ('two_D_data' in locals() and not 'fee_two_D' in locals()): fee_two_D = two_D_data elif 'fee_two_D' in locals(): fee_two_D += two_D_data acceptedtotals = np.zeros(totals.shape) acceptedfee1 = np.zeros((fee_one_D.shape)) if 'fee_two_D' in locals(): acceptedfee2 = np.zeros((fee_two_D.shape)) print("Synchronizing rank", rank) comm.Reduce(fee_one_D, acceptedfee1) comm.Reduce(totals, acceptedtotals) if 'acceptedfee2' in locals(): comm.Reduce(fee_two_D, acceptedfee2) print("number averaged", acceptedtotals[0]) if rank == 0: if acceptedtotals[0] > 0: acceptedfee1 /= acceptedtotals[0] if 'acceptedfee2' in locals(): acceptedfee2 /= acceptedtotals[0] import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from pylab import savefig, close from matplotlib.backends.backend_pdf import PdfPages import matplotlib.pyplot as plt from matplotlib import cm if params.dispatch.events_accepted: easy_pickle.dump( os.path.join( params.output.output_dir, "fee_avg_1_D_" + 'r%s' % params.input.run_num + "_accepted.pickle"), acceptedfee1) pp1 = PdfPages( os.path.join( params.output.output_dir, "fee_avg_1_D_" + 'r%s' % params.input.run_num + "_accepted.pdf")) if 'acceptedfee2' in locals(): easy_pickle.dump( os.path.join( params.output.output_dir, "fee_avg_2_D_" + 'r%s' % params.input.run_num + "_accepted.pickle"), acceptedfee2) pp2 = PdfPages( os.path.join( params.output.output_dir, "fee_avg_2_D_" + 'r%s' % params.input.run_num + "_accepted.pdf")) if params.dispatch.events_all: easy_pickle.dump( os.path.join( params.output.output_dir, "fee_avg_1_D_" + 'r%s' % params.input.run_num + "_all.pickle"), acceptedfee1) pp1 = PdfPages( os.path.join( params.output.output_dir, "fee_avg_1_D_" + 'r%s' % params.input.run_num + "_all.pdf")) if 'acceptedfee2' in locals(): easy_pickle.dump( os.path.join( params.output.output_dir, "fee_avg_2_D_" + 'r%s' % params.input.run_num + "_all.pickle"), acceptedfee2) pp2 = PdfPages( os.path.join( params.output.output_dir, "fee_avg_2_D_" + 'r%s' % params.input.run_num + "_all.pdf")) if params.dispatch.events_rejected: easy_pickle.dump( os.path.join( params.output.output_dir, "fee_avg_1_D_" + 'r%s' % params.input.run_num + "_rejected.pickle"), acceptedfee1) pp1 = PdfPages( os.path.join( params.output.output_dir, "fee_avg_1_D_" + 'r%s' % params.input.run_num + "_rejected.pdf")) if 'acceptedfee2' in locals(): easy_pickle.dump( os.path.join( params.output.output_dir, "fee_avg_2_D_" + 'r%s' % params.input.run_num + "_rejected.pickle"), acceptedfee2) pp2 = PdfPages( os.path.join( params.output.output_dir, "fee_avg_2_D_" + 'r%s' % params.input.run_num + "_rejected.pdf")) print("Done") #plotting result # matplotlib needs a different backend when run on the cluster nodes at SLAC # these two lines not needed when working interactively at SLAC, or on mac or on viper if params.input.pixel_to_eV.energy_per_px is not None: xvals = ( np.array(range(acceptedfee1.shape[0])) - params.input.pixel_to_eV.x_coord_one ) * params.input.pixel_to_eV.energy_per_px + params.input.pixel_to_eV.y_coord_one xvals = xvals[::-1] if params.input.pixel_to_eV.x_coord_two is not None: eV_per_px = (params.input.pixel_to_eV.y_coord_two - params.input.pixel_to_eV.y_coord_one) / ( params.input.pixel_to_eV.x_coord_two - params.input.pixel_to_eV.x_coord_one) xvals = (np.array(range(acceptedfee1.shape[0])) - params.input.pixel_to_eV.x_coord_one ) * eV_per_px + params.input.pixel_to_eV.y_coord_one xvals = xvals[::-1] if params.input.pixel_to_eV.x_coord_two is None and params.input.pixel_to_eV.energy_per_px is None: xvals = np.arange(0, len(acceptedfee1), 1) yvals = acceptedfee1 def OneD_plot(X, Y): plt.figure() plt.clf() plt.plot(X, Y) if params.dispatch.events_accepted: plt.title('Accepted Shots FEE Spectrum Run %s' % params.input.run_num) elif params.dispatch.events_all: plt.title('All Shots FEE Spectrum Run %s' % params.input.run_num) elif params.dispatch.events_rejected: plt.title('Rejected Shots FEE Spectrum Run %s' % params.input.run_num) if params.input.pixel_to_eV.x_coord_one is not None: plt.xlabel('eV', fontsize=13) else: plt.xlabel('pixels', fontsize=13) plt.ylabel('pixels', fontsize=13) pp1.savefig() def TwoD_plot(data): plt.figure() ax = plt.gca() # use specified range 0, 50 to plot runs 117 - 201 #min=0, vmax=50 cax = ax.imshow(data, interpolation='nearest', origin='lower', cmap=cm.coolwarm) plt.colorbar(cax, fraction=0.014, pad=0.04) if params.dispatch.events_accepted: ax.set_title('Accepted 2-D FEE Spectrum Run %s' % params.input.run_num) elif params.dispatch.events_all: ax.set_title('All 2-D FEE Spectrum Run %s' % params.input.run_num) elif params.dispatch.events_rejected: ax.set_title('Rejected 2-D FEE Spectrum Run %s' % params.input.run_num) pp2.savefig() OneD_plot(xvals, yvals) pp1.close() if 'acceptedfee2' in locals(): TwoD_plot(acceptedfee2) pp2.close()
def extract(file_name, crystal_symmetry, wavelength_id, crystal_id, show_details_if_error, output_r_free_label, merge_non_unique_under_symmetry, map_to_asu, remove_systematic_absences, all_miller_arrays=None, incompatible_flags_to_work_set=False, ignore_bad_sigmas=False, extend_flags=False, return_as_miller_arrays=False, log=sys.stdout): import iotbx.cif from cctbx import miller if all_miller_arrays is None: base_array_info = miller.array_info( crystal_symmetry_from_file=crystal_symmetry) all_miller_arrays = iotbx.cif.reader( file_path=file_name).build_miller_arrays( base_array_info=base_array_info) if (len(all_miller_arrays) == 0): raise Sorry( "No data arrays were found in this CIF file. Please make " + "sure that the file contains reflection data, rather than the refined " + "model.") column_labels = set() if (extend_flags): map_to_asu = True # TODO: is all_mille_arrays a dict ? If not change back for (data_name, miller_arrays) in six.iteritems(all_miller_arrays): for ma in miller_arrays.values(): other_symmetry = crystal_symmetry try: crystal_symmetry = other_symmetry.join_symmetry( other_symmetry=ma.crystal_symmetry(), force=True) except AssertionError as e: str_e = str(e) from six.moves import cStringIO as StringIO s = StringIO() if "Space group is incompatible with unit cell parameters." in str_e: other_symmetry.show_summary(f=s) ma.crystal_symmetry().show_summary(f=s) str_e += "\n%s" % (s.getvalue()) raise Sorry(str_e) else: raise if (crystal_symmetry.unit_cell() is None or crystal_symmetry.space_group_info() is None): raise Sorry( "Crystal symmetry is not defined. Please use the --symmetry option." ) mtz_object = iotbx.mtz.object() \ .set_title(title="phenix.cif_as_mtz") \ .set_space_group_info(space_group_info=crystal_symmetry.space_group_info()) unit_cell = crystal_symmetry.unit_cell() mtz_crystals = {} mtz_object.set_hkl_base(unit_cell=unit_cell) from iotbx.reflection_file_utils import cif_status_flags_as_int_r_free_flags # generate list of all reflections (for checking R-free flags) from iotbx.reflection_file_utils import make_joined_set all_arrays = [] for (data_name, miller_arrays) in six.iteritems(all_miller_arrays): for ma in miller_arrays.values(): all_arrays.append(ma) complete_set = make_joined_set(all_arrays) if return_as_miller_arrays: miller_array_list = [] for i, (data_name, miller_arrays) in enumerate(six.iteritems(all_miller_arrays)): for ma in miller_arrays.values(): ma = ma.customized_copy( crystal_symmetry=crystal_symmetry).set_info(ma.info()) labels = ma.info().labels label = get_label(miller_array=ma, output_r_free_label=output_r_free_label) if label is None: print("Can't determine output label for %s - skipping." % \ ma.info().label_string(), file=log) continue elif label.startswith(output_r_free_label): ma, _ = cif_status_flags_as_int_r_free_flags( ma, test_flag_value="f") if isinstance(ma.data(), flex.double): data_int = ma.data().iround() assert data_int.as_double().all_eq(ma.data()) ma = ma.customized_copy(data=data_int).set_info(ma.info()) elif ( (ma.is_xray_amplitude_array() or ma.is_xray_intensity_array()) and isinstance(ma.data(), flex.int)): ma = ma.customized_copy(data=ma.data().as_double()).set_info( ma.info()) crys_id = 0 for l in labels: if 'crystal_id' in l: crys_id = int(l.split('=')[-1]) break if crys_id > 0 and crystal_id is None: label += "%i" % crys_id if crystal_id is not None and crys_id > 0 and crys_id != crystal_id: continue if crys_id not in mtz_crystals: mtz_crystals[crys_id] = (mtz_object.add_crystal( name="crystal_%i" % crys_id, project_name="project", unit_cell=unit_cell), {}) crystal, datasets = mtz_crystals[crys_id] w_id = 0 for l in labels: if 'wavelength_id' in l: w_id = int(l.split('=')[-1]) break if wavelength_id is not None and w_id > 0 and w_id != wavelength_id: continue if w_id > 1 and wavelength_id is None: if (label in column_labels): label += "%i" % w_id #print "label is", label if w_id not in datasets: wavelength = ma.info().wavelength if (wavelength is None): wavelength = 0 datasets[w_id] = crystal.add_dataset(name="dataset", wavelength=wavelength) dataset = datasets[w_id] # if all sigmas for an array are set to zero either raise an error, or set sigmas to None if ma.sigmas() is not None and (ma.sigmas() == 0).count(False) == 0: if ignore_bad_sigmas: print("Warning: bad sigmas, setting sigmas to None.", file=log) ma.set_sigmas(None) else: raise Sorry("""Bad sigmas: all sigmas are equal to zero. Add --ignore_bad_sigmas to command arguments to leave out sigmas from mtz file.""" ) if not ma.is_unique_set_under_symmetry(): if merge_non_unique_under_symmetry: print("Warning: merging non-unique data", file=log) if (label.startswith(output_r_free_label) and incompatible_flags_to_work_set): merging = ma.merge_equivalents( incompatible_flags_replacement=0) if merging.n_incompatible_flags > 0: print("Warning: %i reflections were placed in the working set " \ "because of incompatible flags between equivalents." %( merging.n_incompatible_flags), file=log) else: try: merging = ma.merge_equivalents() except Sorry as e: if ("merge_equivalents_exact: incompatible" in str(e)): raise Sorry( str(e) + " for %s" % ma.info().labels[-1] + "\n" + "Add --incompatible_flags_to_work_set to command line " "arguments to place incompatible flags to working set." ) raise ma = merging.array().customized_copy( crystal_symmetry=ma).set_info(ma.info()) elif return_as_miller_arrays: # allow non-unique set pass else: n_all = ma.indices().size() sel_unique = ma.unique_under_symmetry_selection() sel_dup = ~flex.bool(n_all, sel_unique) n_duplicate = sel_dup.count(True) n_uus = sel_unique.size() msg = ( "Miller indices not unique under symmetry: " + file_name + \ "(%d redundant indices out of %d)" % (n_all-n_uus, n_all) + "Add --merge to command arguments to force merging data.") if (show_details_if_error): print(msg) ma.show_comprehensive_summary(prefix=" ") ma.map_to_asu().sort().show_array(prefix=" ") raise Sorry(msg) if (map_to_asu): ma = ma.map_to_asu().set_info(ma.info()) if (remove_systematic_absences): ma = ma.remove_systematic_absences() if (label.startswith(output_r_free_label) and complete_set is not None): n_missing = len(complete_set.lone_set(other=ma).indices()) if (n_missing > 0): if (extend_flags): from cctbx import r_free_utils # determine flag values fvals = list(set(ma.data())) print("fvals", fvals) fval = None if (len(fvals) == 1): fval = fvals[0] elif (len(fvals) == 2): f1 = (ma.data() == fvals[0]).count(True) / ma.data().size() f2 = (ma.data() == fvals[1]).count(True) / ma.data().size() if (f1 < f2): fval = fvals[0] else: fval = fvals[1] elif (len(fvals) == 0): fval = None else: fval = 0 if (not fval in fvals): raise Sorry( "Cannot determine free-R flag value.") # if (fval is not None): ma = r_free_utils.extend_flags( r_free_flags=ma, test_flag_value=fval, array_label=label, complete_set=complete_set, preserve_input_values=True, allow_uniform_flags=True, log=sys.stdout) else: ma = None else: libtbx.warn(( "%d reflections do not have R-free flags in the " + "array '%s' - this may " + "cause problems if you try to use the MTZ file for refinement " + "or map calculation. We recommend that you extend the flags " + "to cover all reflections (--extend_flags on the command line)." ) % (n_missing, label)) # Get rid of fake (0,0,0) reflection in some CIFs if (ma is not None): ma = ma.select_indices(indices=flex.miller_index( ((0, 0, 0), )), negate=True).set_info(ma.info()) if return_as_miller_arrays: miller_array_list.append(ma) continue # don't make a dataset dec = None if ("FWT" in label): dec = iotbx.mtz.ccp4_label_decorator() column_types = None if ("PHI" in label or "PHWT" in label) and (ma.is_real_array()): column_types = "P" elif (label.startswith("DANO") and ma.is_real_array()): if (ma.sigmas() is not None): column_types = "DQ" else: column_types = "D" label_base = label i = 1 while label in column_labels: label = label_base + "-%i" % (i) i += 1 if (ma is not None): column_labels.add(label) dataset.add_miller_array(ma, column_root_label=label, label_decorator=dec, column_types=column_types) if return_as_miller_arrays: return miller_array_list else: return mtz_object
def run(args): # processing command-line stuff, out of the object log = multi_out() log.register("stdout", sys.stdout) if len(args) == 0: format_usage_message(log) return input_objects = process_command_line_with_files( args=args, master_phil=master_params(), pdb_file_def="model_file_name", map_file_def="map_file_name", reflection_file_def="hkl_file_name", cif_file_def="ligands_file_name") work_params = input_objects.work.extract() if [work_params.map_file_name, work_params.hkl_file_name].count(None) < 1: raise Sorry("Only one source of map could be supplied.") input_objects.work.show(prefix=" ", out=log) if len(work_params.model_file_name) == 0: raise Sorry("No PDB file specified") if work_params.output_prefix is None: work_params.output_prefix = os.path.basename(work_params.model_file_name[0]) log_file_name = "%s.log" % work_params.output_prefix logfile = open(log_file_name, "w") log.register("logfile", logfile) err_log = multi_out() err_log.register(label="log", file_object=log) # err_log.register(label="stderr", file_object=sys.stderr) sys.stderr = err_log if work_params.loop_idealization.output_prefix is None: work_params.loop_idealization.output_prefix = "%s_rama_fixed" % work_params.output_prefix # Here we start opening files provided, # collect crystal symmetries pdb_combined = iotbx.pdb.combine_unique_pdb_files(file_names=work_params.model_file_name) pdb_input = iotbx.pdb.input(source_info=None, lines=flex.std_string(pdb_combined.raw_records)) pdb_cs = pdb_input.crystal_symmetry() crystal_symmetry = None map_cs = None map_content = input_objects.get_file(work_params.map_file_name) if map_content is not None: try: map_cs = map_content.crystal_symmetry() except NotImplementedError as e: pass try: crystal_symmetry = crystal.select_crystal_symmetry( from_command_line = None, from_parameter_file = None, from_coordinate_files = [pdb_cs], from_reflection_files = [map_cs], enforce_similarity = True) except AssertionError as e: if len(e.args)>0 and e.args[0].startswith("No unit cell and symmetry information supplied"): pass else: raise e model = mmtbx.model.manager( model_input = pdb_input, restraint_objects = input_objects.cif_objects, crystal_symmetry = crystal_symmetry, process_input = False, log=log) map_data = None shift_manager = None if map_content is not None: map_data, map_cs, shift_manager = get_map_from_map( map_content, work_params, xrs=model.get_xray_structure(), log=log) model.set_shift_manager(shift_manager) # model.get_hierarchy().write_pdb_file("junk_shift.pdb") hkl_content = input_objects.get_file(work_params.hkl_file_name) if hkl_content is not None: map_data, map_cs = get_map_from_hkl( hkl_content, work_params, xrs=model.get_xray_structure(), # here we don't care about atom order log=log) mi_object = model_idealization( model = model, map_data = map_data, params=work_params, log=log, verbose=False) mi_object.run() mi_object.print_stat_comparison() print >> log, "RMSD from starting model (backbone, all): %.4f, %.4f" % ( mi_object.get_rmsd_from_start(), mi_object.get_rmsd_from_start2()) mi_object.print_runtime() # add hydrogens if needed ? print >> log, "All done." log.close()
def __init__(self, map_manager, mask_as_map_manager, model=None, box_cushion=3, wrapping=None, model_can_be_outside_bounds=False, log=sys.stdout): self._map_manager = map_manager self._model = model self.model_can_be_outside_bounds = model_can_be_outside_bounds assert map_manager.shift_cart() == mask_as_map_manager.shift_cart() # safeguards assert isinstance(map_manager, iotbx.map_manager.map_manager) assert isinstance(mask_as_map_manager, iotbx.map_manager.map_manager) assert self._map_manager.map_data().accessor().origin() == (0, 0, 0) assert map_manager.is_similar(mask_as_map_manager) if self.map_manager().wrapping(): assert map_manager.unit_cell_grid == map_manager.map_data().all() self._force_wrapping = wrapping if wrapping is None: wrapping = self.map_manager().wrapping() self.basis_for_boxing_string = 'around_mask bounds, wrapping = %s' % ( wrapping) # Make sure the map goes from 0 to 1 map_data = mask_as_map_manager.map_data() mmm = map_data.as_1d().min_max_mean() minimum = mmm.min range_of_values = mmm.max - mmm.min map_data = (map_data - minimum) / max(1.e-10, range_of_values) # Get a connectivity object that marks all the connected regions in map from cctbx.maptbx.segment_and_split_map import get_co co, sorted_by_volume, min_b, max_b = get_co(map_data=map_data, threshold=0.5, wrapping=False) if len(sorted_by_volume) < 2: # didn't work raise Sorry("No mask obtained...") # Get the biggest connected region in the map original_id_from_id = {} for i in range(1, len(sorted_by_volume)): v, id = sorted_by_volume[i] original_id_from_id[i] = id id = 1 orig_id = original_id_from_id[id] # Get lower and upper bounds of this region in grid units self.gridding_first = min_b[orig_id] self.gridding_last = max_b[orig_id] # Increase range of bounds by box_cushion cs = map_manager.crystal_symmetry() cushion = flex.double(cs.unit_cell().fractionalize( (box_cushion, ) * 3)) all_orig = map_manager.map_data().all() self.gridding_first = [ max(0, ifloor(gf - c * n)) for c, gf, n in zip(cushion, self.gridding_first, all_orig) ] self.gridding_last = [ min(n - 1, iceil(gl + c * n)) for c, gl, n in zip(cushion, self.gridding_last, all_orig) ] # Ready with gridding...set up shifts and box crystal_symmetry self.set_shifts_and_crystal_symmetry() self.apply_to_model_ncs_and_map() # Also apply to mask_as_map_manager so that mask_as_map_manager is boxed mask_as_map_manager = self.apply_to_map(mask_as_map_manager) self.mask_as_map_manager = mask_as_map_manager # save it
phil_scope = parse(phil_str) user_phil = [] root_dirs = [] indexing_phil = None for arg in sys.argv[1:]: if os.path.isdir(arg): root_dirs.append(arg) elif os.path.isfile(arg): assert indexing_phil is None indexing_phil = arg else: try: user_phil.append(parse(arg)) except Exception: raise Sorry("Couldn't parse argument %s" % arg) params = phil_scope.fetch(sources=user_phil).extract() print("Finding files") images = [] strongs = [] for root in root_dirs: for filename in os.listdir(root): if os.path.splitext(filename)[1] != params.image_extension: continue filepath = os.path.join(root, filename) strong_filepath = os.path.join( root,
def run(args, return_list_of_tests=None): if (len(args) == 0): raise Usage( """libtbx.run_tests_parallel [module=NAME] [directory=path]""") user_phil = [] for arg in args: if os.path.isdir(arg): user_phil.append(libtbx.phil.parse("directory=%s" % arg)) else: try: arg_phil = libtbx.phil.parse(arg) except RuntimeError: raise Sorry("Unrecognized argument '%s'" % arg) else: user_phil.append(arg_phil) params = master_phil.fetch(sources=user_phil).extract() if params.run_in_tmp_dir: from libtbx.test_utils import open_tmp_directory run_dir = open_tmp_directory() print('Running tests in %s' % run_dir) os.chdir(run_dir) elif return_list_of_tests: pass # don't need to check anything else: cwd = os.getcwd() cwd_files = os.listdir(cwd) if (len(cwd_files) > 0): raise Sorry("Please run this program in an empty directory.") if (len(params.directory) == 0) and (len(params.module) == 0): raise Sorry("Please specify modules and/or directories to test.") all_tests = [] all_tests.extend(libtbx.test_utils.parallel.make_commands(params.script)) for dir_name in params.directory: if os.path.split(dir_name)[-1].find("cctbx_project") > -1: print('DANGER ' * 10) print( 'Using the directory option in cctbx_project can be very time consuming' ) print('DANGER ' * 10) dir_tests = libtbx.test_utils.parallel.find_tests(dir_name) all_tests.extend(libtbx.test_utils.parallel.make_commands(dir_tests)) for module_name in params.module: module_tests = libtbx.test_utils.parallel.get_module_tests(module_name) all_tests.extend(module_tests) if return_list_of_tests: return all_tests if (len(all_tests) == 0): raise Sorry("No test scripts found in %s." % params.directory) if (params.shuffle): random.shuffle(all_tests) if (params.quiet): params.verbosity = 0 with open("run_tests_parallel_zlog", "w") as log: result = libtbx.test_utils.parallel.run_command_list( cmd_list=all_tests, nprocs=params.nproc, log=log, verbosity=params.verbosity, max_time=params.max_time) print("\nSee run_tests_parallel_zlog for full output.\n") if (result.failure > 0): print("") print("*" * 80) print("ERROR: %d TEST FAILURES. PLEASE FIX BEFORE COMMITTING CODE." % \ result.failure) print("*" * 80) print("") return result.failure
def run(args, out=None, master_params=None, assume_shelx_observation_type_is="intensities"): if (out is None): out = sys.stdout import iotbx.phil if (master_params is None): master_params = iotbx.phil.parse(master_phil, process_includes=True) cmdline = cmdline_processor(args=args, master_phil=master_params, reflection_file_def="file_name", pdb_file_def="symmetry_file", space_group_def="space_group", unit_cell_def="unit_cell", usage_string="""\ phenix.merging_statistics [data_file] [options...] Calculate merging statistics for non-unique data, including R-merge, R-meas, R-pim, and redundancy. Any format supported by Phenix is allowed, including MTZ, unmerged Scalepack, or XDS/XSCALE (and possibly others). Data should already be on a common scale, but with individual observations unmerged. %s """ % citations_str) params = cmdline.work.extract() i_obs = iotbx.merging_statistics.select_data( file_name=params.file_name, data_labels=params.labels, log=out, assume_shelx_observation_type_is=assume_shelx_observation_type_is) params.labels = i_obs.info().label_string() validate_params(params) symm = sg = uc = None if (params.symmetry_file is not None): from iotbx import crystal_symmetry_from_any symm = crystal_symmetry_from_any.extract_from( file_name=params.symmetry_file) if (symm is None): raise Sorry("No symmetry records found in %s." % params.symmetry_file) else: sg = i_obs.space_group() if (params.space_group is not None): sg = params.space_group.group() elif (sg is None): raise Sorry("Missing space group information.") uc = i_obs.unit_cell() if (params.unit_cell is not None): uc = params.unit_cell elif (uc is None): raise Sorry("Missing unit cell information.") from cctbx import crystal symm = crystal.symmetry(space_group=sg, unit_cell=uc) if (i_obs.sigmas() is None): raise Sorry("Sigma(I) values required for this application.") result = iotbx.merging_statistics.dataset_statistics( i_obs=i_obs, crystal_symmetry=symm, d_min=params.high_resolution, d_max=params.low_resolution, n_bins=params.n_bins, binning_method=params.binning_method, anomalous=params.anomalous, debug=params.debug, file_name=params.file_name, sigma_filtering=params.sigma_filtering, use_internal_variance=params.use_internal_variance, eliminate_sys_absent=params.eliminate_sys_absent, extend_d_max_min=params.extend_d_max_min, cc_one_half_significance_level=params.cc_one_half_significance_level, cc_one_half_method=params.cc_one_half_method, log=out) result.show(out=out) if (getattr(params, "loggraph", False)): result.show_loggraph(out=out) if (params.estimate_cutoffs): result.show_estimated_cutoffs(out=out) if params.json.file_name is not None: result.as_json(file_name=params.json.file_name, indent=params.json.indent) if params.mmcif.file_name is not None: import iotbx.cif.model cif = iotbx.cif.model.cif() cif[params.mmcif.data_name] = result.as_cif_block() with open(params.mmcif.file_name, 'wb') as f: print >> f, cif print >> out, "" print >> out, "References:" print >> out, citations_str print >> out, "" return result
def run(args, log=sys.stdout): print("-"*79, file=log) print(legend, file=log) print("-"*79, file=log) inputs = mmtbx.utils.process_command_line_args(args = args, master_params = master_params()) params = inputs.params.extract() # estimate resolution d_min = params.resolution broadcast(m="Map resolution:", log=log) if(d_min is None): raise Sorry("Resolution is required.") print(" d_min: %6.4f"%d_min, file=log) # model broadcast(m="Input PDB:", log=log) file_names = inputs.pdb_file_names if(len(file_names) != 1): raise Sorry("PDB file has to given.") if(inputs.crystal_symmetry is None): raise Sorry("No crystal symmetry defined.") pdb_inp = iotbx.pdb.input(file_name=file_names[0]) model = mmtbx.model.manager( model_input = pdb_inp, crystal_symmetry=inputs.crystal_symmetry) model.process(make_restraints=True) if model.get_number_of_models() > 1: raise Sorry("Only one model allowed.") model.setup_scattering_dictionaries(scattering_table=params.scattering_table) model.get_xray_structure().show_summary(f=log, prefix=" ") broadcast(m="Input map:", log=log) if(inputs.ccp4_map is None): raise Sorry("Map file has to given.") inputs.ccp4_map.show_summary(prefix=" ") map_data = inputs.ccp4_map.map_data() print(" Actual map (min,max,mean):", \ map_data.as_1d().min_max_mean().as_tuple(), file=log) make_sub_header("Histogram of map values", out=log) md = map_data.as_1d() show_histogram(data=md, n_slots=10, data_min=flex.min(md), data_max=flex.max(md), log=log) # shift origin if needed soin = maptbx.shift_origin_if_needed(map_data=map_data, sites_cart=model.get_sites_cart(), crystal_symmetry=model.crystal_symmetry()) map_data = soin.map_data model.set_sites_cart(soin.sites_cart) #### # Compute and show all stats #### broadcast(m="Model statistics:", log=log) make_sub_header("Overall", out=log) info = mmtbx.model.statistics.info(model=model) info.geometry.show() # XXX - these are not available anymore due to refactoring # make_sub_header("Histogram of devations from ideal bonds", out=log) # show_histogram(data=ms.bond_deltas, n_slots=10, data_min=0, data_max=0.2, # log=log) # # # make_sub_header("Histogram of devations from ideal angles", out=log) # show_histogram(data=ms.angle_deltas, n_slots=10, data_min=0, data_max=30., # log=log) # # # make_sub_header("Histogram of non-bonded distances", out=log) # show_histogram(data=ms.nonbonded_distances, n_slots=10, data_min=0, # data_max=5., log=log) # make_sub_header("Histogram of ADPs", out=log) info.adp.show(log=log) # bs = xrs.extract_u_iso_or_u_equiv()*adptbx.u_as_b(1.) # show_histogram(data=bs, n_slots=10, data_min=flex.min(bs), # data_max=flex.max(bs), log=log) # # Compute CC broadcast(m="Map-model CC (overall):", log=log) five_cc_result = mmtbx.maps.correlation.five_cc(map = map_data, xray_structure = model.get_xray_structure(), d_min = d_min) atom_radius = five_cc_result.atom_radius if atom_radius is None: atom_radius = five_cc_result._atom_radius() print(" CC_mask : %6.4f"%five_cc_result.result.cc_mask, file=log) print(" CC_volume: %6.4f"%five_cc_result.result.cc_volume, file=log) print(" CC_peaks : %6.4f"%five_cc_result.result.cc_peaks, file=log) # Compute FSC(map, model) broadcast(m="Model-map FSC:", log=log) fsc = mmtbx.maps.correlation.fsc_model_vs_map( xray_structure = model.get_xray_structure(), map = map_data, atom_radius = atom_radius, d_min = d_min) fsc.show(prefix=" ") # Local CC cc_calculator = mmtbx.maps.correlation.from_map_and_xray_structure_or_fmodel( xray_structure = model.get_xray_structure(), map_data = map_data, d_min = d_min) broadcast(m="Map-model CC (local):", log=log) # per residue print("Per residue:", file=log) residue_results = list() ph = model.get_hierarchy() xrs = model.get_xray_structure() for rg in ph.residue_groups(): cc = cc_calculator.cc(selection=rg.atoms().extract_i_seq()) chain_id = rg.parent().id print(" chain id: %s resid %s: %6.4f"%( chain_id, rg.resid(), cc), file=log) # per chain print("Per chain:", file=log) for chain in ph.chains(): print(" chain %s: %6.4f"%(chain.id, cc_calculator.cc( selection=chain.atoms().extract_i_seq())), file=log) # per residue detailed counts print("Per residue (histogram):", file=log) crystal_gridding = maptbx.crystal_gridding( unit_cell = xrs.unit_cell(), space_group_info = xrs.space_group_info(), pre_determined_n_real = map_data.accessor().all()) f_calc = xrs.structure_factors(d_min=d_min).f_calc() fft_map = miller.fft_map( crystal_gridding = crystal_gridding, fourier_coefficients = f_calc) fft_map.apply_sigma_scaling() map_model = fft_map.real_map_unpadded() sites_cart = xrs.sites_cart() cc_per_residue = flex.double() for rg in ph.residue_groups(): cc = mmtbx.maps.correlation.from_map_map_atoms( map_1 = map_data, map_2 = map_model, sites_cart = sites_cart.select(rg.atoms().extract_i_seq()), unit_cell = xrs.unit_cell(), radius = 2.) cc_per_residue.append(cc) show_histogram(data=cc_per_residue, n_slots=10, data_min=-1., data_max=1.0, log=log)
def selection_string_from_selection(pdb_h, selection, chains_info=None, atom_selection_cache=None): """ !!! if selection contains alternative conformations, the assertion in the end will fail. This is to prevent using this function with such selections. This limits its application to search NCS only and at the same time asserts that found NCS groups don't contain alternative conformations. Convert a selection array to a selection string. The function tries to minimise the selection string as possible, using chain names, resseq ranges and when there is not other option residues IDs Limitations: When pdb_h contains multiple conformations, selection must not include residues with alternate locations Args: pdb_h : iotbx.pdb.hierarchy selection (flex.bool or flex.size_t) chains_info : object containing chains (str): chain IDs OR selections string res_name (list of str): list of residues names resid (list of str): list of residues sequence number, resid atom_names (list of list of str): list of atoms in residues atom_selection (list of list of list of int): the location of atoms in ph chains_atom_number (list of int): list of number of atoms in each chain Returns: sel_str (str): atom selection string """ if isinstance(selection, flex.bool): selection = selection.iselection(True) if selection.size() == 0: raise Sorry('Empty atom selection') # pdb_hierarchy_inp is a hierarchy selection_set = set(selection) sel_list = [] # pdb_h.select(selection).write_pdb_file("selected_in.pdb") # using chains_info to improve performance if not chains_info: chains_info = get_chains_info(pdb_h) # print "chains_info" # for k, v in chains_info.iteritems(): # print k, v # print "\n\n" chain_ids = sorted(chains_info) for ch_id in chain_ids: # print "chains_info[ch_id].atom_selection", chains_info[ch_id].atom_selection # this "unfolds" the atom_selection array which is [[],[],[],[]...] into # a set if not chain_is_needed(selection, chains_info[ch_id].atom_selection): continue a_sel = {x for xi in chains_info[ch_id].atom_selection for x in xi} test_set = a_sel.intersection(selection_set) if not test_set: continue ch_sel = "chain '%s'" % convert_wildcards_in_chain_id(ch_id) # Chain should be present, so do all the work. # if there is water in chain, specify residues numbers water_present = (len(a_sel) != chains_info[ch_id].chains_atom_number) complete_ch_not_present = (test_set != a_sel) or water_present if bool(chains_info[ch_id].no_altloc): no_altloc = chains_info[ch_id].no_altloc no_altloc_present = no_altloc.count(False) > 0 else: no_altloc_present = False # exclude residues with alternative locations complete_ch_not_present |= no_altloc_present # print "complete_ch_not_present", complete_ch_not_present res_sel = [] if complete_ch_not_present: # collect continuous ranges of residues when possible res_len = len(chains_info[ch_id].resid) # prev_resid = None prev_all_atoms_present = None cur_all_atoms_present = None atoms_for_dumping = [] # all_prev_atoms_in_range previous_res_selected_atom_names = [] a_sel = set(chains_info[ch_id].atom_selection[0]) cur_res_selected_atom_names = get_atom_names_from_test_set( a_sel.intersection(selection_set), a_sel, chains_info[ch_id].atom_names[0]) atoms_in_current_range = cur_res_selected_atom_names sequence_was_broken = False first_resid = chains_info[ch_id].resid[0] last_resid = None for i in xrange(res_len): cur_resid = chains_info[ch_id].resid[i] # test that all atoms in residue are included in selection a_sel = set(chains_info[ch_id].atom_selection[i]) # print "a_sel", a_sel test_set = a_sel.intersection(selection_set) # if not bool(test_set): continue if len(test_set) == 0: # None of residue's atoms are selected # print "Breaking 1" sequence_was_broken = True continue if no_altloc_present and not no_altloc[i]: # print "Breaking 2" sequence_was_broken = True continue all_atoms_present = (test_set == a_sel) if prev_all_atoms_present is None: prev_all_atoms_present = cur_all_atoms_present else: prev_all_atoms_present = cur_all_atoms_present and prev_all_atoms_present cur_all_atoms_present = all_atoms_present previous_res_selected_atom_names = cur_res_selected_atom_names cur_res_selected_atom_names = get_atom_names_from_test_set( test_set, a_sel, chains_info[ch_id].atom_names[i]) # print "all_atoms_present (cur/prev), test_set", chains_info[ch_id].resid[i], cur_all_atoms_present, prev_all_atoms_present, test_set, chains_info[ch_id].atom_names[i] # prev_resid = cur_resid cur_resid = chains_info[ch_id].resid[i] # print "cur_resid", cur_resid # new range is needed when previous selection doesn't match current # selection. # print "cur/prev res_sel", cur_res_selected_atom_names, previous_res_selected_atom_names # print "atoms_for_dumping", atoms_for_dumping # print "atoms_in_current_range", atoms_in_current_range # print "intersecting sets:", set(cur_res_selected_atom_names) ^ set(previous_res_selected_atom_names) continue_range = False continue_range = ((cur_all_atoms_present and prev_all_atoms_present) or (len( set(cur_res_selected_atom_names) ^ set(atoms_in_current_range)) == 0)) continue_range &= not chains_info[ch_id].gap_residue[i] # print "continue range 1", continue_range # residues are consequtive continue_range = continue_range and not sequence_was_broken # print "continue range 2", continue_range if len(atoms_for_dumping) > 0: continue_range = continue_range and (len( set(atoms_for_dumping) ^ set(cur_res_selected_atom_names)) == 0) sequence_was_broken = False # print "continue range 3", continue_range if continue_range: # continue range # print "Continuing range" last_resid = cur_resid atoms_in_current_range = list( set(atoms_in_current_range) | set(cur_res_selected_atom_names)) if not cur_all_atoms_present: # all_prev_atoms_in_range |= set(cur_res_selected_atom_names) atoms_for_dumping = cur_res_selected_atom_names else: # dump previous range, start new one # print "Dumping range" if len(atoms_for_dumping) > 0: atoms_sel = get_atom_str( previous_res_selected_atom_names) else: atoms_sel = "" if prev_all_atoms_present else get_atom_str( previous_res_selected_atom_names) if prev_all_atoms_present is None: atoms_sel = "" if cur_all_atoms_present else get_atom_str( cur_res_selected_atom_names) res_sel = update_res_sel(res_sel=res_sel, first_resid=first_resid, last_resid=last_resid, atoms_selection=atoms_sel) # print "res_sel", res_sel first_resid = cur_resid last_resid = cur_resid atoms_in_current_range = cur_res_selected_atom_names if not cur_all_atoms_present: atoms_for_dumping = cur_res_selected_atom_names else: atoms_for_dumping = [] prev_all_atoms_present = None # print "DUMPING THE LAST RANGE" # print "prev_all_atoms_present", prev_all_atoms_present atoms_sel = "" if prev_all_atoms_present else get_atom_str( previous_res_selected_atom_names) if prev_all_atoms_present or prev_all_atoms_present is None: atoms_sel = "" if cur_all_atoms_present else get_atom_str( cur_res_selected_atom_names) # print "atoms_sel", atoms_sel omit_resids = (first_resid == chains_info[ch_id].resid[0] and last_resid == chains_info[ch_id].resid[-1]) res_sel = update_res_sel(res_sel, first_resid, last_resid, atoms_sel, omit_resids) s = get_clean_selection_string(ch_sel, res_sel) sel_list.append(s) # add parenthesis what selection is more than just a chain s_l = [] sel_list.sort() for s in sel_list: if len(s) > 10: s = '(' + s + ')' s_l.append(s) sel_str = ' or '.join(s_l) # This check could take up to ~90% of runtime of this function... # Nevertheless, this helps to spot bugs early. So this should remain # here, let's say for a year. If no bugs discovered, this could be removed. # When ready to remove, don't forget to remove atom_selection_cache # parameter as well. # Current removal date: Jan 22, 2017 # Removed on Feb, 7, 2018. # if atom_selection_cache is None: # atom_selection_cache = pdb_h.atom_selection_cache() # isel = atom_selection_cache.iselection(sel_str) # # pdb_h.select(isel).write_pdb_file("selected_string.pdb") # # pdb_h.select(selection).write_pdb_file("selected_isel.pdb") # assert len(isel) == len(selection), ""+\ # "%d (result) != %d (input): conversion to string selects different number of atoms!.\n" \ # % (len(isel), len(selection)) +\ # "String lead to error: '%s'" % sel_str # This hack is implemented to allow a chain be completely in two alternative # conformations. Above check would fail. Selections outputted in refinement # are incorrect, but underlying iselections are actually correct and refinement # should be fine. General solution would be a universal procedure which can # handle alternative conformations correctly, but this is time-demanding project. # http://phenix-online.org/pipermail/phenixbb/2018-November/024006.html if sel_str == '': sel_str = "not all" return sel_str
def validate_params(params): if (params.fetch_pdb.pdb_ids is None) or (len(params.fetch_pdb.pdb_ids)==0): raise Sorry("No PDB IDs specified!") return True
def selection_parser(self, word_iterator, optional=True, callback=None, stop_word=None, expect_nonmatching_closing_parenthesis=False): have_optional = False result_stack = [] for word, word_iterator in simple_parser.infix_as_postfix( word_iterator=word_iterator, stop_word=stop_word, expect_nonmatching_closing_parenthesis= expect_nonmatching_closing_parenthesis): lword = word.value.lower() def raise_syntax_error(): raise RuntimeError( 'Atom selection syntax error at word "%s".' % lword) if (lword == "optional"): if (len(result_stack) != 0): raise Sorry('"optional" can appear only at the beginning.') if (have_optional): raise Sorry('"optional" can appear only once.') have_optional = True elif (lword == "not"): assert len(result_stack) >= 1 arg = result_stack.pop() result_stack.append(~arg) elif (lword in ["and", "or"]): assert len(result_stack) >= 2 rhs = result_stack.pop() lhs = result_stack.pop() if (lword == "and"): result_stack.append(lhs & rhs) else: result_stack.append(lhs | rhs) else: if (lword == "all"): result_stack.append(flex.bool(self.n_seq, True)) elif (lword == "none"): result_stack.append(flex.bool(self.n_seq, False)) elif (lword == "name"): result_stack.append( self.sel_name( pattern=word_iterator.pop_argument(word.value))) elif (lword in ["altloc", "altid"]): result_stack.append( self.sel_altloc( pattern=word_iterator.pop_argument(word.value))) elif (lword == "resname"): result_stack.append( self.sel_resname( pattern=word_iterator.pop_argument(word.value))) elif (lword == "chain"): result_stack.append( self.sel_chain_id( pattern=word_iterator.pop_argument(word.value))) elif (lword in ["resseq", "resid", "resi", "model"]): arg = word_iterator.pop_argument(word.value) def try_compose_range(): def is_cont(): if (len(arg_cont.value) == 0): return False return ("0123456789".find(arg_cont.value[0]) >= 0) i_colon = arg.value.find(":") if (i_colon < 0): arg_cont = word_iterator.try_pop() if (arg_cont is None): return arg.value, -1 if (not arg_cont.value.startswith(":")): word_iterator.backup() return arg.value, -1 if (len(arg_cont.value) == 1): arg_cont = word_iterator.try_pop() if (arg_cont is None): return arg.value + ":", len(arg.value) if (not is_cont()): word_iterator.backup() return arg.value + ":", len(arg.value) return arg.value + ":" + arg_cont.value, len( arg.value) return arg.value + arg_cont.value, len(arg.value) elif (i_colon + 1 == len(arg.value)): arg_cont = word_iterator.try_pop() if (arg_cont is not None): if (is_cont()): return arg.value + arg_cont.value, i_colon word_iterator.backup() return arg.value, i_colon def try_compose_sequence(): arg_next = word_iterator.try_pop() if (arg_next is None): word_iterator.backup() return None, None lnext = arg_next.value.lower() if (lnext == "through"): arg_final = word_iterator.pop_argument( arg_next.value) return arg.value, arg_final.value word_iterator.backup() return (None, None) val, i_colon = try_compose_range() if (i_colon < 0): if (lword == "resseq"): result_stack.append(self.sel_resseq(pattern=arg)) elif (lword in ["resid", "resi"]): start, stop = try_compose_sequence() if (start is None): result_stack.append( self.sel_resid(pattern=arg)) else: result_stack.append( self.sel_resid_sequence(start=start, stop=stop)) else: result_stack.append(self.sel_model_id(pattern=arg)) else: start = val[:i_colon] stop = val[i_colon + 1:] if (lword == "resseq"): result_stack.append( self.sel_resseq_range(start=start, stop=stop)) elif (lword in ["resid", "resi"]): result_stack.append( self.sel_resid_range(start=start, stop=stop)) else: result_stack.append( self.sel_model_id_range(start=start, stop=stop)) elif (lword == "icode"): result_stack.append( self.sel_icode( pattern=word_iterator.pop_argument(word.value))) elif (lword == "segid"): result_stack.append( self.sel_segid( pattern=word_iterator.pop_argument(word.value))) elif (lword == "element"): result_stack.append( self.sel_element( pattern=word_iterator.pop_argument(word.value))) elif (lword == "charge"): result_stack.append( self.sel_charge( pattern=word_iterator.pop_argument(word.value))) elif (lword == "anisou"): result_stack.append(self.sel_anisou()) elif (lword == "pepnames"): result_stack.append(self.sel_pepnames()) elif ((lword == "protein" or lword == "peptide") and callback is None): # if there is callback, these keywords shoudl be processed there, # most likely it is pdb_interpretation result_stack.append(self.sel_protein()) elif lword == "nucleotide" and callback is None: result_stack.append(self.sel_nucleotide()) elif (lword == "single_atom_residue"): result_stack.append(self.sel_single_atom_residue()) elif (lword == "water"): result_stack.append(self.sel_water()) elif (lword == "hetero") or (lword == "hetatm"): result_stack.append(self.sel_hetero()) elif (lword == "bfactor") or (lword == "occupancy"): op = word_iterator.pop_argument(word.value).value if (not op in [">", "<", "="]): raise_syntax_error() else: arg_next = word_iterator.try_pop() lnext = arg_next.value try: val = float(lnext) except ValueError: raise_syntax_error() else: if (lword == "bfactor"): result_stack.append(self.sel_bfactor(op, val)) else: result_stack.append(self.sel_occupancy( op, val)) elif ((lword == "within" or lword == 'residues_within') and (self.special_position_settings is not None)): assert word_iterator.pop().value == "(" radius = float(word_iterator.pop().value) assert word_iterator.pop().value == "," sel = self.selection_parser( word_iterator=word_iterator, callback=callback, expect_nonmatching_closing_parenthesis=True) if lword == 'within': result_stack.append( self.sel_within(radius=radius, primary_selection=sel)) elif lword == 'residues_within': result_stack.append( self.sel_residues_within(radius=radius, primary_selection=sel)) elif (callback is not None): if (not callback(word=word, word_iterator=word_iterator, result_stack=result_stack)): raise_syntax_error() else: raise_syntax_error() if (optional): have_optional = False if (len(result_stack) == 0): if (have_optional): return None return flex.bool(self.n_seq, False) selection = result_stack[0] for result in result_stack[1:]: selection &= result if (have_optional and selection.all_eq(False)): return None return selection
def process_input_array(self, arr): array = arr.deep_copy() work_array = arr multiplicities = None try: if self.merge_equivalents : array, multiplicities, merge = MergeData(array, self.settings.show_anomalous_pairs) settings = self.settings data = array.data() #import code, traceback; code.interact(local=locals(), banner="".join( traceback.format_stack(limit=10) ) ) self.missing_set = oop.null() #if (array.is_xray_intensity_array()): # data.set_selected(data < 0, flex.double(data.size(), 0.)) if (array.is_unique_set_under_symmetry()) and (settings.map_to_asu): array = array.map_to_asu() if (multiplicities is not None): multiplicities = multiplicities.map_to_asu() if (settings.d_min is not None): array = array.resolution_filter(d_min=settings.d_min) if (multiplicities is not None): multiplicities = multiplicities.resolution_filter( d_min=settings.d_min) self.filtered_array = array.deep_copy() if (settings.expand_anomalous): if not array.is_unique_set_under_symmetry(): raise Sorry("Error! Cannot generate bijvoet mates of unmerged reflections.") array = array.generate_bijvoet_mates() original_symmetry = array.crystal_symmetry() if (multiplicities is not None): multiplicities = multiplicities.generate_bijvoet_mates() if (self.settings.show_missing): self.missing_set = array.complete_set().lone_set(array) if self.settings.show_anomalous_pairs: self.missing_set = self.missing_set.select( self.missing_set.centric_flags().data(), negate=True) if (settings.expand_to_p1): if not array.is_unique_set_under_symmetry(): raise Sorry("Error! Cannot expand unmerged reflections to P1.") original_symmetry = array.crystal_symmetry() array = array.expand_to_p1().customized_copy( crystal_symmetry=original_symmetry) #array = array.niggli_cell().expand_to_p1() #self.missing_set = self.missing_set.niggli_cell().expand_to_p1() self.missing_set = self.missing_set.expand_to_p1().customized_copy( crystal_symmetry=original_symmetry) if (multiplicities is not None): multiplicities = multiplicities.expand_to_p1().customized_copy( crystal_symmetry=original_symmetry) data = array.data() self.r_free_mode = False self.phases = flex.double(data.size(), float('nan')) self.radians = flex.double(data.size(), float('nan')) self.ampl = flex.double(data.size(), float('nan')) self.sigmas = None if isinstance(data, flex.bool): self.r_free_mode = True data_as_float = flex.double(data.size(), 0.0) data_as_float.set_selected(data==True, flex.double(data.size(), 1.0)) data = data_as_float self.data = data #.deep_copy() else : if isinstance(data, flex.double): self.data = data #.deep_copy() elif isinstance(data, flex.complex_double): self.data = data #.deep_copy() self.ampl = flex.abs(data) self.phases = flex.arg(data) * 180.0/math.pi # purge nan values from array to avoid crash in fmod_positive() b = flex.bool([bool(math.isnan(e)) for e in self.phases]) # replace the nan values with an arbitrary float value self.phases = self.phases.set_selected(b, 42.4242) # Cast negative degrees to equivalent positive degrees self.phases = flex.fmod_positive(self.phases, 360.0) self.radians = flex.arg(data) # replace the nan values with an arbitrary float value self.radians = self.radians.set_selected(b, 0.424242) elif hasattr(array.data(), "as_double"): self.data = array.data().as_double() else: raise RuntimeError("Unexpected data type: %r" % data) if (settings.show_data_over_sigma): if (array.sigmas() is None): raise Sorry("sigmas not defined.") sigmas = array.sigmas() non_zero_sel = sigmas != 0 array = array.select(non_zero_sel) array = array.customized_copy(data=array.data()/array.sigmas()) self.data = array.data() if (multiplicities is not None): multiplicities = multiplicities.select(non_zero_sel) if array.sigmas() is not None: self.sigmas = array.sigmas() else: self.sigmas = None work_array = array except Exception as e: print(to_str(e) + "".join(traceback.format_stack(limit=10))) raise e return None, None work_array.set_info(arr.info() ) multiplicities = multiplicities return work_array, multiplicities
def run(args, command_name="phenix.explore_metric_symmetry"): command_line = ( option_parser( usage=command_name+" [options]", description="""\ Explore Metric Symmetry. A list of possible unit cells and spacegroups is given for the given specified unit cell and spacegroup combination. If a second unit cell is given, linear combinations of the basis vector of one unit cell are sought that match the other.""") .enable_symmetry_comprehensive() .option(None, "--max_delta", action = "store", type="float", default=5.0, dest = "max_delta", help = "Maximum delta/obliquity used in determining the lattice symmetry, using a modified Le-Page algorithm. Default is 5.0 degrees", metavar="FLOAT") .option(None, "--start_from_p1", action="store_true", dest="niggli", default=False, help="Reduce to Niggli cell and forget the input spacegroup before higher metric symmetry is sought.") .option(None, "--graph", action="store", default=None, help="A graphical representation of the graph will be written out." " Requires Graphviz to be installed and on PATH.") .option(None, "--centring_type", action="store", type="str", help="Centring type, choose from P,A,B,C,I,R,F") .option(None, "--other_unit_cell", action="store", type="str", help="Other unit cell, for unit cell comparison", metavar="10,20,30,90,103.7,90") .option(None, "--other_space_group", action="store", type="str", help="space group for other_unit_cell, for unit cell comparison") .option(None, "--other_centring_type", action="store", type="str", help="Centring type, choose from P,A,B,C,I,R,F") .option(None, "--no_point_group_graph", action="store_true", dest="pg_graph", default=False, help="Do not carry out the construction of a point group graph." ) .option(None, "--relative_length_tolerance", action="store", type="float", help="Tolerance for unit cell lengths to be considered equal-ish.", default=0.10, metavar="FLOAT", dest="rel_length_tol") .option(None, "--absolute_angle_tolerance", action="store", dest="abs_angle_tol", type="float", default=10.0, metavar="FLOAT", help="Angular tolerance in unit cell comparison") .option(None, "--max_order", action="store", type="int", default=1, metavar="INT", help="Maximum volume change for target cell" ) ).process(args=args) log = multi_out() log.register(label="stdout", file_object=sys.stdout) allowed_centring_types={"P":"Primitive", "A":"A centered", "B":"B centered", "C":"C centered", "I":"Body centered", "R":"Rombohedral", "F":"Face centered"} if command_line.options.centring_type is not None: if command_line.options.centring_type not in allowed_centring_types: print("Sorry, the centring type %s is not known."%(command_line.options.centring_type), file=log) print("Choose from P,A,B,C,I,R,F ", file=log) return xs = None other_xs = None if len(args)==0: command_line.parser.show_help() return if ( command_line.symmetry.unit_cell() == None ): print(file=log) print("Sorry: Unit cell not specified.", file=log) print(file=log) command_line.parser.show_help() return if command_line.options.centring_type is None: if ( command_line.symmetry.space_group_info() == None ): print(file=log) print("Sorry: centring type or space group not specified.", file=log) print(file=log) command_line.parser.show_help() return if command_line.symmetry.space_group_info() is not None: if not ( command_line.symmetry.space_group().is_chiral() ): print("Sorry, Non chiral space groups not yet supported.", file=log) return if command_line.options.centring_type is not None: xs = crystal.symmetry( unit_cell=command_line.symmetry.unit_cell(), space_group_symbol="Hall: %s 1" %( command_line.options.centring_type ) ) command_line.symmetry = xs if command_line.options.niggli: print("*Unit cell will be niggli reduced and P1 will be assumed*", file=log) uc = command_line.symmetry.change_basis( command_line.symmetry.change_of_basis_op_to_niggli_cell() ).unit_cell() command_line.symmetry = crystal.symmetry( uc, "P 1" ) xs = command_line.symmetry ############################################################################ # ABOVE IS JUST INPUT PARSING, NOW THE ACTUAL STUFF HAPPENS ############################################################################ if not command_line.options.pg_graph: ############################## # get a point group graph # ############################## pg_object = do_pointgroup_tricks( xs.unit_cell(), xs.space_group(), command_line.options.max_delta, log ) ################################################ # make a graphical representation if desired # ################################################ if command_line.options.graph is not None: make_graph_of_graph(pg_object, command_line.options.graph, log) ######################################### # Check if other cell has been defined # ######################################### if command_line.options.other_unit_cell is not None: print("A second unit cell has been specified. ", file=log) other_xs = None if command_line.options.other_space_group is None: if command_line.options.other_centring_type is None: raise Sorry("No space group or centring type for other cell specified.") else: other_xs = crystal.symmetry( command_line.options.other_unit_cell, space_group_symbol="Hall: %s 1" %( command_line.options.other_centring_type ) ) else: other_xs = crystal.symmetry( command_line.options.other_unit_cell, space_group_symbol=command_line.options.other_space_group ) # get the graph is desired if not command_line.options.pg_graph: other_pg_object = do_pointgroup_tricks( other_xs.unit_cell(), other_xs.space_group(), command_line.options.max_delta, log ) # do the unit cell comparison print(file=log) print(file=log) print("Unit cell comparison", file=log) print("--------------------", file=log) print(file=log) print("The unit cells will be compared. The smallest niggli cell,", file=log) print("will be used as a (semi-flexible) lego-block to see if it", file=log) print("can construct the larger Niggli cell.", file=log) print(file=log) print(file=log) order = command_line.options.max_order if order==1: sl_object = slt.compare_lattice(xs_a=xs, xs_b=other_xs, max_delta=command_line.options.max_delta, out=log, relative_length_tolerance=command_line.options.rel_length_tol, absolute_angle_tolerance=command_line.options.abs_angle_tol) else: tmp_a = xs.change_basis( xs.change_of_basis_op_to_niggli_cell() ) tmp_b = other_xs.change_basis( other_xs.change_of_basis_op_to_niggli_cell() ) modified_xs = None order = command_line.options.max_order lego_block = None if ( tmp_a.unit_cell().volume() > tmp_b.unit_cell().volume() ): modified_xs = slt.make_list_of_target_xs_up_to_order( xs, order ) lego_block = other_xs else: modified_xs = slt.make_list_of_target_xs_up_to_order( other_xs, order ) lego_block = xs print(file=log) print("Volume change of largest niggli cell requested via keyword --max_order", file=log) print(file=log) print("Input crystal symmetry is tranformed to niggli setting using the operator:", file=log) print(modified_xs.basic_to_niggli_cb_op.as_xyz(), file=log) print(file=log) print("Comparisons for various sublattices of the target cell are listed", file=log) print(file=log) for tmp_xs,cb_op,mat in zip(modified_xs.xs_list, modified_xs.extra_cb_op, modified_xs.matrices ): mat=mat.as_list_of_lists() print("===================================================================", file=log) print("Niggli cell is expanded using matrix:", file=log) print(file=log) print(" /%4i %4i %4i \ "%(mat[0][0],mat[0][1],mat[0][2]), file=log) print(" M = |%4i %4i %4i | "%(mat[1][0],mat[1][1],mat[1][2]), file=log) print(" \%4i %4i %4i / "%(mat[2][0],mat[2][1],mat[2][2]), file=log) print(file=log) print("Change of basis operator to reference setting:", file=log) print(" ", cb_op.as_xyz(), file=log) print("resulting crystal symmetry:", file=log) tmp_xs.show_summary(f=log,prefix=" ") print(file=log) print(file=log) sl_object = slt.compare_lattice(xs_a=tmp_xs, xs_b=lego_block, max_delta=command_line.options.max_delta, out=log, relative_length_tolerance=command_line.options.rel_length_tol, absolute_angle_tolerance=command_line.options.abs_angle_tol)
"--detector_version_phil", "-d", type="string", default=None, dest="det_phil", help="detector version phil for the CSPAD").option( None, "--image", "-i", type="string", default=None, dest="det_image", help="image matching the detector version phil")).process( args=sys.argv[1:]) if cmd_line.options.det_phil is not None and cmd_line.options.det_image is not None: print "extracting active areas..." active_areas = get_CSPAD_active_areas(cmd_line.options.det_image, cmd_line.options.det_phil) elif cmd_line.options.det_phil is None and cmd_line.options.det_image is None: print "using active areas from LG36 CSPAD metrology" active_areas = LG36_active_areas else: raise Sorry( "Specify both a detector version phil and an example image to extract active areas." ) for arg in cmd_line.args: file = open(arg, "rb") data = pickle.load(file) file.close() plot_preds(data, active_areas=active_areas)
def create_sheet_hydrogen_bond_proxies(sheet_params, pdb_hierarchy, weight, hbond_counts, distance_ideal, distance_cut, remove_outliers, log=sys.stdout): assert (not None in [distance_ideal, distance_cut]) cache = pdb_hierarchy.atom_selection_cache() prev_strand = sheet_params.first_strand prev_selection = cache.selection(prev_strand) prev_rgs = _get_residue_groups_from_selection( pdb_hierarchy=pdb_hierarchy, bool_selection=prev_selection) n_proxies = 0 k = 0 generated_proxies = geometry_restraints.shared_bond_simple_proxy() while k < len(sheet_params.strand): curr_strand = sheet_params.strand[k] curr_selection = cache.selection(curr_strand.selection) curr_start = None prev_start = None if curr_strand.bond_start_current is not None: curr_start = cache.selection(curr_strand.bond_start_current) if curr_strand.bond_start_previous is not None: prev_start = cache.selection(curr_strand.bond_start_previous) curr_rgs = _get_residue_groups_from_selection( pdb_hierarchy=pdb_hierarchy, bool_selection=curr_selection) i = j = 0 len_prev_residues = len(prev_rgs) len_curr_residues = len(curr_rgs) if curr_start is not None and prev_start is not None: if curr_start.count(True) < 1 or prev_start.count(True) < 1: error_msg = """\ Wrong registration in SHEET record. One of these selections "%s" or "%s" yielded zero or several atoms. Possible reason for it is the presence of insertion codes or alternative conformations for one of these residues or the .pdb file was edited without updating SHEET records.""" \ % (curr_strand.bond_start_current, curr_strand.bond_start_previous) raise Sorry(error_msg) current_start_res_is_donor = pdb_hierarchy.atoms().select( curr_start)[0].name.strip() == 'N' if (len_curr_residues > 0) and (len_prev_residues > 0): i = _find_start_residue(residues=prev_rgs, start_selection=prev_start) j = _find_start_residue(residues=curr_rgs, start_selection=curr_start) if (i >= 0) and (j >= 0): # move i,j pointers from registration residues to the beginning of # beta-strands while (1 < i and ((1 < j and curr_strand.sense == "parallel") or (j < len_curr_residues - 2 and curr_strand.sense == "antiparallel"))): if curr_strand.sense == "parallel": i -= 2 j -= 2 elif curr_strand.sense == "antiparallel": i -= 2 j += 2 if (curr_strand.sense == "parallel"): # some tweaking for ensure correct donor assignment if i >= 2 and not current_start_res_is_donor: i -= 2 current_start_res_is_donor = not current_start_res_is_donor if j >= 2 and current_start_res_is_donor: j -= 2 current_start_res_is_donor = not current_start_res_is_donor while (i < len_prev_residues) and (j < len_curr_residues): if current_start_res_is_donor: donor_residue = curr_rgs[j] acceptor_residue = prev_rgs[i] i += 2 else: donor_residue = prev_rgs[i] acceptor_residue = curr_rgs[j] j += 2 current_start_res_is_donor = not current_start_res_is_donor if donor_residue.atom_groups()[0].resname.strip( ) != "PRO": proxies = _create_hbond_proxy( acceptor_atoms=acceptor_residue.atoms(), donor_atoms=donor_residue.atoms(), hbond_counts=hbond_counts, distance_ideal=distance_ideal, distance_cut=distance_cut, remove_outliers=remove_outliers, weight=weight, sigma=sheet_params.sigma, slack=sheet_params.slack, top_out=sheet_params.top_out, log=log) if proxies is not None: for proxy in proxies: generated_proxies.append(proxy) elif (curr_strand.sense == "antiparallel"): while (i < len_prev_residues and j >= 0): if (prev_rgs[i].atom_groups()[0].resname.strip() != "PRO"): proxies = _create_hbond_proxy( acceptor_atoms=curr_rgs[j].atoms(), donor_atoms=prev_rgs[i].atoms(), hbond_counts=hbond_counts, distance_ideal=distance_ideal, distance_cut=distance_cut, remove_outliers=remove_outliers, weight=weight, sigma=sheet_params.sigma, slack=sheet_params.slack, top_out=sheet_params.top_out, log=log) if proxies is not None: for proxy in proxies: generated_proxies.append(proxy) if (curr_rgs[j].atom_groups()[0].resname.strip() != "PRO"): proxies = _create_hbond_proxy( acceptor_atoms=prev_rgs[i].atoms(), donor_atoms=curr_rgs[j].atoms(), hbond_counts=hbond_counts, distance_ideal=distance_ideal, distance_cut=distance_cut, remove_outliers=remove_outliers, weight=weight, sigma=sheet_params.sigma, slack=sheet_params.slack, top_out=sheet_params.top_out, log=log) if proxies is not None: for proxy in proxies: generated_proxies.append(proxy) i += 2 j -= 2 else: print >> log, " WARNING: strand direction not defined!" print >> log, " previous: %s" % prev_strand print >> log, " current: %s" % curr_strand.selection else: print >> log, " WARNING: can't find start of bonding for strands!" print >> log, " previous: %s" % prev_strand print >> log, " current: %s" % curr_strand.selection else: print >> log, " WARNING: can't find one or more strands!" print >> log, " previous: %s" % prev_strand print >> log, " current: %s" % curr_strand.selection k += 1 prev_strand = curr_strand.selection prev_selection = curr_selection prev_rgs = curr_rgs return generated_proxies
def get_probabilities(input): result = flex.double([float(d) for d in input.split(',')]) if (abs(1.0 - flex.sum(result)) > 1.e-3): raise Sorry("Sorry, the given probabilities must sum to one") return result
if "target=" in arg: found_it = True break if not found_it: raise Usage(command_line.parser.usage) if command_line.options.no_display: display = False arguments.append('--nodisplay') else: display = True assert command_line.options.num_procs > 0 if command_line.options.output_dir is not None and \ not os.path.isdir(command_line.options.output_dir): raise Sorry("Output dir %s doesn't exist" % command_line.options.output_dir) def do_work(item): file, arguments, kwargs = item try: run_one_index(file, *arguments, **({'display': display})) except Exception, e: if hasattr(e, "classname"): print e.classname, "for %s:" % file, else: print "Indexing error for %s:" % file, print e if command_line.options.num_procs == 1: for file in files: if command_line.options.output_dir is not None:
def __init__( self, model, pdb_hierarchy=None, # keep for mmtbx.validation_summary (multiple models) fmodel=None, fmodel_neutron=None, sequences=None, flags=None, header_info=None, raw_data=None, unmerged_data=None, keep_hydrogens=True, nuclear=False, save_probe_unformatted_file=None, show_hydrogen_outliers=False, min_cc_two_fofc=0.8, n_bins_data=10, count_anomalous_pairs_separately=False, use_internal_variance=True, outliers_only=True, use_pdb_header_resolution_cutoffs=False, file_name=None, ligand_selection=None, rotamer_library="8000", map_params=None): assert rotamer_library == "8000", "data_version given to RotamerEval not recognized." for name in self.__slots__: setattr(self, name, None) # use objects from model self.model = model if (self.model is not None): pdb_hierarchy = self.model.get_hierarchy() xray_structure = self.model.get_xray_structure() geometry_restraints_manager = self.model.get_restraints_manager( ).geometry crystal_symmetry = self.model.crystal_symmetry() all_chain_proxies = self.model.all_chain_proxies else: assert (pdb_hierarchy is not None) xray_structure = None geometry_restraints_manager = None crystal_symmetry = None all_chain_proxies = None # very important - the i_seq attributes may be extracted later pdb_hierarchy.atoms().reset_i_seq() self.pdb_hierarchy = pdb_hierarchy if (xray_structure is None): if (fmodel is not None): xray_structure = fmodel.xray_structure elif (crystal_symmetry is not None): xray_structure = pdb_hierarchy.extract_xray_structure( crystal_symmetry=crystal_symmetry) self.crystal_symmetry = crystal_symmetry if (crystal_symmetry is None) and (fmodel is not None): self.crystal_symmetry = fmodel.f_obs().crystal_symmetry() # use maps (fmodel is not used) # run earlier since pdb_hierarchy gets modified use_maps = False if (map_params is not None): use_maps = ((map_params.input.maps.map_file_name) or ((map_params.input.maps.map_coefficients_file_name) and (map_params.input.maps.map_coefficients_label))) if (use_maps): if (flags.real_space): self.real_space = experimental.real_space( fmodel=None, model=self.model, cc_min=min_cc_two_fofc, molprobity_map_params=map_params.input.maps) if (flags.waters): self.waters = waters.waters( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=None, collect_all=True, molprobity_map_params=map_params.input.maps) self.header_info = header_info if (flags is None): flags = molprobity_flags() import mmtbx.model.statistics self.model_statistics_geometry = mmtbx.model.statistics.geometry( pdb_hierarchy=pdb_hierarchy, geometry_restraints_manager=geometry_restraints_manager, use_hydrogens=keep_hydrogens, use_nuclear=nuclear) self.model_statistics_geometry_result = \ self.model_statistics_geometry.result() self.ramalyze = self.model_statistics_geometry_result.ramachandran.ramalyze self.omegalyze = self.model_statistics_geometry_result.omega.omegalyze self.rotalyze = self.model_statistics_geometry_result.rotamer.rotalyze self.cbetadev = self.model_statistics_geometry_result.c_beta.cbetadev self.clashes = self.model_statistics_geometry_result.clash.clashes if pdb_hierarchy.contains_protein(): self.find_missing_atoms(out=null_out()) if (flags.nqh): self.nqh_flips = clashscore.nqh_flips( pdb_hierarchy=pdb_hierarchy) if (pdb_hierarchy.contains_rna() and flags.rna and libtbx.env.has_module(name="suitename")): if (geometry_restraints_manager is not None): self.rna = rna_validate.rna_validation( pdb_hierarchy=pdb_hierarchy, geometry_restraints_manager=geometry_restraints_manager, outliers_only=outliers_only, params=None) if (flags.model_stats) and (xray_structure is not None): self.model_stats = model_properties.model_statistics( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, all_chain_proxies=all_chain_proxies, ignore_hd=(not nuclear), ligand_selection=ligand_selection) if (geometry_restraints_manager is not None) and (flags.restraints): assert (xray_structure is not None) self.restraints = restraints.combined( pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, geometry_restraints_manager=geometry_restraints_manager, ignore_hd=(not nuclear), cdl=getattr(all_chain_proxies, "use_cdl", None)) if (sequences is not None) and (flags.seq): self.sequence = sequence.validation( pdb_hierarchy=pdb_hierarchy, sequences=sequences, log=null_out(), include_secondary_structure=True, extract_coordinates=True) if (fmodel is not None): if (use_pdb_header_resolution_cutoffs) and (header_info is not None): fmodel = fmodel.resolution_filter(d_min=header_info.d_min, d_max=header_info.d_max) if (flags.rfactors): self.data_stats = experimental.data_statistics( fmodel, raw_data=raw_data, n_bins=n_bins_data, count_anomalous_pairs_separately= count_anomalous_pairs_separately) if (not use_maps): # if maps are used, keep previous results if (flags.real_space): self.real_space = experimental.real_space( model=model, fmodel=fmodel, cc_min=min_cc_two_fofc) if (flags.waters): self.waters = waters.waters(pdb_hierarchy=pdb_hierarchy, xray_structure=xray_structure, fmodel=fmodel, collect_all=True) if (unmerged_data is not None): self.merging = experimental.merging_and_model_statistics( f_obs=fmodel.f_obs(), f_model=fmodel.f_model(), r_free_flags=fmodel.r_free_flags(), unmerged_i_obs=unmerged_data, anomalous=count_anomalous_pairs_separately, use_internal_variance=use_internal_variance, n_bins=n_bins_data) if (flags.xtriage): import mmtbx.scaling.xtriage f_model = abs( fmodel.f_model()).set_observation_type_xray_amplitude() if (raw_data is not None): f_model, obs = f_model.common_sets(other=raw_data) else: obs = fmodel.f_obs() self.xtriage = mmtbx.scaling.xtriage.xtriage_analyses( miller_obs=obs, miller_calc=f_model, unmerged_obs=unmerged_data, # XXX some redundancy here... text_out=null_out()) if (fmodel_neutron is not None) and (flags.rfactors): self.neutron_stats = experimental.data_statistics( fmodel_neutron, n_bins=n_bins_data, count_anomalous_pairs_separately=False) if (pdb_hierarchy.models_size() == 1): self._multi_criterion = multi_criterion_view(pdb_hierarchy) # wilson B self.wilson_b = None if (fmodel is not None): self.wilson_b = fmodel.wilson_b() elif (fmodel_neutron is not None): self.wilson_b = fmodel_neutron.wilson_b() # validate hydrogens self.hydrogens = None if self.model is not None and self.model.has_hd(): # import here to avoid circular import issues from mmtbx.hydrogens.validate_H import validate_H, validate_H_results hydrogens = validate_H(model, nuclear) hydrogens.validate_inputs() hydrogens.run() self.hydrogens = validate_H_results(hydrogens.get_results()) # write probe file if needed (CLI and GUI) if (save_probe_unformatted_file is not None): pcm = self.clashes.probe_clashscore_manager try: with open(save_probe_unformatted_file, 'w') as f: f.write(pcm.probe_unformatted) self.clashes.probe_file = save_probe_unformatted_file except IOError as err: raise Sorry('%s could not be written correctly.\n%s' % (save_probe_unformatted_file, err))
def _silhouette_analysis(self, cluster_labels, linkage_matrix, n_clusters, min_silhouette_score): """Compare valid equal-sized clustering using silhouette scores. Args: cluster_labels (np.ndarray): linkage_matrix (np.ndarray): The hierarchical clustering of centroids of the initial clustering as produced by :func:`scipy.cluster.hierarchy.linkage`. n_clusters (int): Optionally override the automatic determination of the number of clusters. min_silhouette_score (float): The minimum silhouette score to be used in automatic determination of the number of clusters. Returns: cluster_labels (np.ndarray): A label for each coordinate. """ eps = 1e-6 cluster_labels_input = cluster_labels distances = linkage_matrix[::, 2] distances = np.insert(distances, 0, 0) silhouette_scores = [] thresholds = [] threshold_n_clusters = [] for threshold in distances[1:]: cluster_labels = copy.deepcopy(cluster_labels_input) labels = hierarchy.fcluster(linkage_matrix, threshold - eps, criterion="distance").tolist() counts = [labels.count(l) for l in set(labels)] if len(set(counts)) > 1: # only equal-sized clusters are valid continue n = len(set(labels)) if n == 1: continue elif n_clusters is not Auto and n != n_clusters: continue for i in range(len(labels)): cluster_labels[cluster_labels_input == i] = int(labels[i] - 1) if len(np.unique(cluster_labels)) == self.coords.shape[0]: # silhouette coefficient not defined if 1 dataset per cluster # not sure what the default value should be sample_silhouette_values = np.full(cluster_labels.size(), 0) else: # Compute the silhouette scores for each sample sample_silhouette_values = metrics.silhouette_samples( self.coords, cluster_labels, metric="cosine") silhouette_avg = sample_silhouette_values.mean() silhouette_scores.append(silhouette_avg) thresholds.append(threshold) threshold_n_clusters.append(n) count_negative = (sample_silhouette_values < 0).sum() logger.info("Clustering:") logger.info(" Number of clusters: %i", n) logger.info( " Threshold score: %.3f (%.1f deg)", threshold, math.degrees(math.acos(1 - threshold)), ) logger.info(" Silhouette score: %.3f", silhouette_avg) logger.info( " -ve silhouette scores: %.1f%%", 100 * count_negative / sample_silhouette_values.size, ) if n_clusters is Auto: idx = np.argmin(silhouette_scores) else: idx = threshold_n_clusters.index(n_clusters) if idx is None: raise Sorry("No valid clustering with %i clusters" % n_clusters) if n_clusters is Auto and silhouette_scores[idx] < min_silhouette_score: # assume single cluster cluster_labels = np.zeros(cluster_labels.size) else: threshold = thresholds[idx] - eps labels = hierarchy.fcluster(linkage_matrix, threshold, criterion="distance") cluster_labels = np.full(self.coords.shape[0], -1, dtype=int) for i in range(len(labels)): cluster_labels[cluster_labels_input == i] = labels[i] - 1 return cluster_labels, threshold
def __init__(self, model, # shifted, with shift_manager map_data = None, # shifted map_data params=None, log=sys.stdout, verbose=True): t_0 = time() self.model = model # self.cif_objects = cif_objects self.params = params self.log = log self.verbose = verbose # self.shift_manager = self.model.get_shift_manager() self.rmsd_from_start = None self.init_model_statistics = None self.init_gm_model_statistics = None self.after_ss_idealization = None self.after_loop_idealization = None self.after_rotamer_fixing = None self.final_model_statistics = None self.user_supplied_map = map_data self.reference_map = None # Whole map for all NCS copies self.master_map = None # Map for only one NCS copy, or == reference_map if no NCS self.init_ref_map = None # separate map for initial GM. Should be tighter than the 2 above params = mmtbx.model.manager.get_default_pdb_interpretation_params() params.pdb_interpretation.clash_guard.nonbonded_distance_threshold=None params.pdb_interpretation.peptide_link.ramachandran_restraints = True params.pdb_interpretation.peptide_link.restrain_rama_outliers = self.params.restrain_rama_outliers params.pdb_interpretation.peptide_link.restrain_rama_allowed = self.params.restrain_rama_allowed params.pdb_interpretation.peptide_link.restrain_allowed_outliers_with_emsley = self.params.restrain_allowed_outliers_with_emsley params.pdb_interpretation.peptide_link.rama_weight = self.params.rama_weight params.pdb_interpretation.peptide_link.oldfield.weight_scale=self.params.oldfield.weight_scale params.pdb_interpretation.peptide_link.oldfield.plot_cutoff=self.params.oldfield.plot_cutoff params.pdb_interpretation.peptide_link.apply_peptide_plane = True if self.params.loop_idealization.make_all_trans: params.pdb_interpretation.peptide_link.apply_all_trans = self.params.apply_all_trans params.pdb_interpretation.nonbonded_weight = self.params.nonbonded_weight params.pdb_interpretation.c_beta_restraints=True params.pdb_interpretation.max_reasonable_bond_distance = None params.pdb_interpretation.ncs_search.enabled = True params.pdb_interpretation.ncs_search.chain_max_rmsd=4.0 params.pdb_interpretation.ncs_search.chain_similarity_threshold=0.99 params.pdb_interpretation.ncs_search.residue_match_radius=999.0 params.pdb_interpretation.restraints_library.rdl = True params.pdb_interpretation.secondary_structure = self.params.secondary_structure self.params_for_model = params self.model.set_pdb_interpretation_params(params) self.original_hierarchy = self.model.get_hierarchy().deep_copy() # original pdb_h, without any processing self.original_boxed_hierarchy = None # original and boxed (if needed) self.filtered_ncs_restr_group_list = [] self.init_ss_annotation = self.model.get_ss_annotation() # various checks, shifts, trims self.cs = self.original_cs = self.model.crystal_symmetry() if self.model.get_shift_manager() is not None: self.cs = self.model.get_shift_manager().box_crystal_symmetry # check self.cs (copy-paste from secondary_sturcure_restraints) corrupted_cs = False if self.cs is not None: if [self.cs.unit_cell(), self.cs.space_group()].count(None) > 0: corrupted_cs = True self.cs = None elif self.cs.unit_cell().volume() < 10: corrupted_cs = True self.cs = None # couple checks if pdb_h is ok o_c = self.original_hierarchy.overall_counts() o_c.raise_duplicate_atom_labels_if_necessary() # o_c.raise_residue_groups_with_multiple_resnames_using_same_altloc_if_necessary() o_c.raise_chains_with_mix_of_proper_and_improper_alt_conf_if_necessary() o_c.raise_improper_alt_conf_if_necessary() if len(self.original_hierarchy.models()) > 1: raise Sorry("Multi model files are not supported") ca_only_present = False for c in self.original_hierarchy.only_model().chains(): if c.is_ca_only(): ca_only_present = True if ca_only_present: raise Sorry("Don't support models with chains containing only CA atoms.") self.original_boxed_hierarchy = self.model.get_hierarchy().deep_copy() self.shift_vector = None if self.cs is None: assert self.model.get_shift_manager() is None # should it happen here? if corrupted_cs: print >> self.log, "Symmetry information is corrupted, " else: print >> self.log, "Symmetry information was not found, " print >> self.log, "putting molecule in P1 box." self.log.flush() from cctbx import uctbx box = uctbx.non_crystallographic_unit_cell_with_the_sites_in_its_center( sites_cart=self.model.get_sites_cart(), buffer_layer=3) # Creating new xrs from box, inspired by extract_box_around_model_and_map sp = crystal.special_position_settings(box.crystal_symmetry()) sites_frac = box.sites_frac() xrs_box = self.model.get_xray_structure().replace_sites_frac(box.sites_frac()) xray_structure_box = xray.structure(sp, xrs_box.scatterers()) self.model.set_xray_structure(xray_structure_box) self.cs = box.crystal_symmetry() self.shift_vector = box.shift_vector if self.shift_vector is not None and self.params.debug: txt = self.model.model_as_pdb() with open("%s_boxed.pdb" % self.params.output_prefix, 'w') as f: f.write(txt) if self.params.trim_alternative_conformations: self.model.remove_alternative_conformations(always_keep_one_conformer=True) self.model = self.model.remove_hydrogens() self.model_h = None self.time_for_init = time()-t_0
user_phil.append(libtbx.phil.parse(file_name=file_name)) elif ext in [".pkl", ".pickle"]: input_string = "run_file = %s" % arg user_phil.append(libtbx.phil.parse(input_string)) else: try: arg_phil = libtbx.phil.parse(arg) except RuntimeError, e: print e else: user_phil.append(arg_phil) working_phil = process_master_phil.fetch(sources=user_phil) params = working_phil.extract() if params.run_file is None: working_phil.show() raise Sorry("Pickled target function run_file not defined.") target = easy_pickle.load(params.run_file) server = detached_process_server(target, params=params) server.run() ######################################################################## # testing classes (see tst_runtime_utils.py for usage) class simple_client(detached_process_client): def __init__(self, *args, **kwds): self.n_cb = 0 self.out = cStringIO.StringIO() detached_process_client.__init__(self, *args, **kwds) def callback_error(self, error, traceback_info): raise error
def __init__( self, map_manager, model=None, target_ncs_au_model=None, regions_to_keep=None, solvent_content=None, resolution=None, sequence=None, molecular_mass=None, symmetry=None, chain_type='PROTEIN', keep_low_density=True, # default from map_box box_cushion=5, soft_mask=True, mask_expand_ratio=1, wrapping=None, log=None): self.model_can_be_outside_bounds = None # not used but required to be set self._map_manager = map_manager self._model = model self._mask_data = None self._force_wrapping = wrapping if wrapping is None: wrapping = self.map_manager().wrapping() self.basis_for_boxing_string = 'around_unique, wrapping = %s' % ( wrapping) if log is None: log = null_out() # Print only if a log is supplied assert isinstance(map_manager, iotbx.map_manager.map_manager) assert self._map_manager.map_data().accessor().origin() == (0, 0, 0) assert resolution is not None if model is not None: assert isinstance(model, mmtbx.model.manager) assert map_manager.is_compatible_model(model) if self.map_manager().wrapping(): # map must be entire unit cell assert map_manager.unit_cell_grid == map_manager.map_data().all() # Get crystal_symmetry self.crystal_symmetry = map_manager.crystal_symmetry() # Convert to map_data from cctbx.maptbx.segment_and_split_map import run as segment_and_split_map assert self._map_manager.map_data().origin() == (0, 0, 0) args = [] ncs_group_obj, remainder_ncs_group_obj, tracking_data = \ segment_and_split_map(args, map_data = self._map_manager.map_data(), crystal_symmetry = self.crystal_symmetry, ncs_obj = self._map_manager.ncs_object(), target_model = target_ncs_au_model, write_files = False, auto_sharpen = False, add_neighbors = False, density_select = False, save_box_map_ncs_au = True, resolution = resolution, solvent_content = solvent_content, chain_type = chain_type, sequence = sequence, molecular_mass = molecular_mass, symmetry = symmetry, keep_low_density = keep_low_density, regions_to_keep = regions_to_keep, box_buffer = box_cushion, soft_mask_extract_unique = soft_mask, mask_expand_ratio = mask_expand_ratio, out = log) from scitbx.matrix import col if not hasattr(tracking_data, 'box_mask_ncs_au_map_data'): raise Sorry(" Extraction of unique part of map failed...") ncs_au_mask_data = tracking_data.box_mask_ncs_au_map_data lower_bounds = ncs_au_mask_data.origin() upper_bounds = tuple(col(ncs_au_mask_data.focus()) - col((1, 1, 1))) print("\nBounds for unique part of map: %s to %s " % (str(lower_bounds), str(upper_bounds)), file=log) # shift the map so it is in the same position as the box map will be in ncs_au_mask_data.reshape(flex.grid(ncs_au_mask_data.all())) assert col(ncs_au_mask_data.all()) == \ col(upper_bounds)-col(lower_bounds)+col((1, 1, 1)) self.gridding_first = lower_bounds self.gridding_last = upper_bounds # Ready with gridding...set up shifts and box crystal_symmetry self.set_shifts_and_crystal_symmetry() # Apply boxing to model, ncs, and map (if available) self.apply_to_model_ncs_and_map() # Note that at this point, self._map_manager has been boxed assert ncs_au_mask_data.all() == self._map_manager.map_data().all() self._mask_data = ncs_au_mask_data # Now separately apply the mask to the boxed map self.apply_around_unique_mask(self._map_manager, resolution=resolution, soft_mask=soft_mask)
def callback_aborted(self): raise Sorry("aborted as planned.")
def get_range(value_list, threshold=None, ignore_ends=True, keep_near_ends_frac=0.02, half_height_width=2., get_half_height_width=None, cutoff_ratio=4, ratio_max=0.5): # XXX May need to set cutoff_ratio and # ratio_max lower. # ignore ends allows ignoring the first and last points which may be off # if get_half_height_width, find width at half max hieght, go # half_height_width times this width out in either direction, use that as # baseline instead of full cell. Don't do it if the height at this point # is over cutoff_ratio times threshold above original baseline. if get_half_height_width: z_min, z_max = get_range(value_list, threshold=0.5, ignore_ends=ignore_ends, keep_near_ends_frac=keep_near_ends_frac, get_half_height_width=False) z_mid = 0.5 * (z_min + z_max) z_width = 0.5 * (z_max - z_min) z_low = z_mid - 2 * z_width z_high = z_mid + 2 * z_width if ignore_ends: i_max = value_list.size() - 2 i_min = 1 else: i_max = value_list.size() - 1 i_min = 0 i_low = max(i_min, min(i_max, int(0.5 + z_low * value_list.size()))) i_high = max(i_min, min(i_max, int(0.5 + z_high * value_list.size()))) min_value = value_list.min_max_mean().min max_value = value_list.min_max_mean().max ratio_low = (value_list[i_low] - min_value) / max( 1.e-10, (max_value - min_value)) ratio_high = (value_list[i_high] - min_value) / max( 1.e-10, (max_value - min_value)) if ratio_low <= cutoff_ratio*threshold and ratio_low >0 \ and ratio_low<ratio_max\ and ratio_high <= cutoff_ratio*threshold and ratio_high > 0 \ and ratio_high < ratio_max: ratio = min(ratio_low, ratio_high) z_min, z_max = get_range(value_list, threshold=threshold + ratio, ignore_ends=ignore_ends, keep_near_ends_frac=keep_near_ends_frac, get_half_height_width=False) return z_min, z_max else: z_min, z_max = get_range(value_list, threshold=threshold, ignore_ends=ignore_ends, keep_near_ends_frac=keep_near_ends_frac, get_half_height_width=False) return z_min, z_max if threshold is None: threshold = 0 n_tot = value_list.size() assert n_tot > 0 min_value = value_list.min_max_mean().min max_value = value_list.min_max_mean().max cutoff = min_value + (max_value - min_value) * threshold if ignore_ends: i_off = 1 else: i_off = 0 i_low = None for i in range(i_off, n_tot - i_off): if value_list[i] > cutoff: i_low = max(i_off, i - 1) break i_high = None for i in range(i_off, n_tot - i_off): ii = n_tot - 1 - i if value_list[ii] > cutoff: i_high = min(n_tot - 1 - i_off, ii + 1) break if i_low is None or i_high is None: raise Sorry("Cannot auto-select region...") if i_low / n_tot < keep_near_ends_frac: i_low = 0 if (n_tot - 1 - i_high) / n_tot < keep_near_ends_frac: i_high = n_tot - 1 return i_low / n_tot, i_high / n_tot
def as_table1_column(self, label, wavelength, log, re_compute_r_factors=Auto): """ Extract information for display in the traditional 'Table 1' of crystallographic statistics in structure articles. """ outer_shell = None data_stats = self.data_stats if (data_stats is None): data_stats = dummy_validation() merging_stats = dummy_validation() merging_outer = dummy_validation() n_refl_uniq = data_stats.n_refl n_refl_refine = data_stats.n_refl_refine n_free = data_stats.n_free completeness = data_stats.completeness completeness_outer = data_stats.completeness_outer d_max_min = self.d_max_min() d_max, d_min = d_max_min if (self.merging is not None): merging_stats = self.merging.overall merging_outer = self.merging.bins[-1] n_refl_uniq = merging_stats.n_uniq epsilon = 0.001 if ((merging_stats.d_min > d_min + 2 * epsilon) or (merging_stats.d_max < d_max - 2 * epsilon)): raise Sorry(( "Resolution limits for unmerged data in the structure " + "'%s' do not cover the " + "full range present in the merged data: %g - %g (merged) versus " + "%g - %g (unmerged)") % (label, d_max, d_min, merging_stats.d_max, merging_stats.d_min)) r_work = self.r_work() r_free = self.r_free() n_tls_groups = None if (self.header_info is not None): if (self.header_info.n_tls_groups > 0): n_tls_groups = self.header_info.n_tls_groups use_header_values = (not re_compute_r_factors or (not self.header_info.is_phenix_refinement() and (re_compute_r_factors is Auto))) r_work, r_free, warned = rfactor_sanity_check( r_work_pdb=self.header_info.r_work, r_free_pdb=self.header_info.r_free, r_work_fmodel=r_work, r_free_fmodel=r_free, out=log, structure_name=label, re_compute_r_factors=not use_header_values) if (use_header_values): n_refl_refine = data_stats.n_refl adp_result = self.adp_stats.result() adp_mean = [None for i in range(4)] for i, prop in enumerate(['overall', 'protein', 'other', 'water']): if getattr(adp_result, prop) is not None: adp_mean[i] = getattr(adp_result, prop).mean return iotbx.table_one.column( label=label, space_group=self.space_group_info(), unit_cell=self.unit_cell().parameters(), # data properties wavelength=wavelength, d_max_min=d_max_min, n_refl_all=merging_stats.n_obs, n_refl=n_refl_uniq, multiplicity=merging_stats.mean_redundancy, completeness=completeness * 100.0, i_over_sigma=merging_stats.i_over_sigma_mean, wilson_b=data_stats.wilson_b, r_sym=merging_stats.r_merge, r_meas=merging_stats.r_meas, r_pim=merging_stats.r_pim, cc_one_half=merging_stats.cc_one_half, cc_star=merging_stats.cc_star, # refinement n_refl_refine=n_refl_refine, n_free=n_free, r_work=r_work, r_free=r_free, cc_work=merging_stats.cc_work, cc_free=merging_stats.cc_free, # model properties n_atoms=self.model_stats_new.result().n_atoms - self.model_stats_new.result().n_hd, n_macro_atoms=self.model_stats_new.result().n_protein_atoms + self.model_stats_new.result().n_nucleotide_atoms, n_ligand_atoms=self.model_stats_new.result().n_other_atoms, n_waters=self.model_stats_new.result().n_water_atoms, n_residues=self.model_stats_new.result().n_protein, bond_rmsd=self.rms_bonds(), angle_rmsd=self.rms_angles(), rama_favored=self.rama_favored(), rama_allowed=self.rama_allowed(), rama_outliers=self.rama_outliers(), rota_outliers=self.rota_outliers(), clashscore=self.clashscore(), adp_mean=adp_mean[0], adp_mean_mm=adp_mean[1], adp_mean_lig=adp_mean[2], adp_mean_wat=adp_mean[3], n_tls_groups=n_tls_groups, anomalous_flag=data_stats.anomalous_flag, ).add_outer_shell( # XXX we need a consistency check here as well d_max_min=(data_stats.d_max_outer, data_stats.d_min_outer), n_refl=data_stats.n_refl_outer, n_refl_all=merging_outer.n_obs, n_refl_refine=data_stats.n_refl_refine_outer, n_free=data_stats.n_free_outer, cc_one_half=merging_outer.cc_one_half, cc_star=merging_outer.cc_star, r_sym=merging_outer.r_merge, r_meas=merging_outer.r_meas, r_pim=merging_outer.r_pim, i_over_sigma=merging_outer.i_over_sigma_mean, multiplicity=merging_outer.mean_redundancy, completeness=completeness_outer * 100, cc_work=merging_outer.cc_work, cc_free=merging_outer.cc_free, r_work=data_stats.r_work_outer, r_free=data_stats.r_free_outer)
def run(self): ''' Run the script. ''' from dials.algorithms.profile_model.factory import ProfileModelFactory from dials.util.command_line import Command from dials.array_family import flex from dials.util.options import flatten_reflections, flatten_experiments from dxtbx.model.experiment_list import ExperimentListDumper from libtbx.utils import Sorry from dials.util import log log.config() # Parse the command line params, options = self.parser.parse_args(show_diff_phil=True) reflections = flatten_reflections(params.input.reflections) experiments = flatten_experiments(params.input.experiments) if len(reflections) == 0 and len(experiments) == 0: self.parser.print_help() return if len(reflections) != 1: raise Sorry('exactly 1 reflection table must be specified') if len(experiments) == 0: raise Sorry('no experiments were specified') if (not 'background.mean' in reflections[0]) and params.subtract_background: raise Sorry( 'for subtract_background need background.mean in reflections') reflections, _ = self.process_reference(reflections[0], params) # Check pixels don't belong to neighbours self.filter_reference_pixels(reflections, experiments) # Predict the reflections logger.info("") logger.info("=" * 80) logger.info("") logger.info("Predicting reflections") logger.info("") predicted = flex.reflection_table.from_predictions_multi( experiments, dmin=params.prediction.d_min, dmax=params.prediction.d_max, margin=params.prediction.margin, force_static=params.prediction.force_static, padding=params.prediction.padding) # Match with predicted matched, reflections, unmatched = predicted.match_with_reference( reflections) assert (len(matched) == len(predicted)) assert (matched.count(True) <= len(reflections)) if matched.count(True) == 0: raise Sorry(''' Invalid input for reference reflections. Zero reference spots were matched to predictions ''') elif len(unmatched) != 0: logger.info('') logger.info('*' * 80) logger.info( 'Warning: %d reference spots were not matched to predictions' % (len(unmatched))) logger.info('*' * 80) logger.info('') # Create the profile model experiments = ProfileModelFactory.create(params, experiments, reflections) for model in experiments: sigma_b = model.profile.sigma_b(deg=True) sigma_m = model.profile.sigma_m(deg=True) if type(sigma_b) == type(1.0): logger.info('Sigma B: %f' % sigma_b) logger.info('Sigma M: %f' % sigma_m) else: # scan varying mean_sigma_b = sum(sigma_b) / len(sigma_b) mean_sigma_m = sum(sigma_m) / len(sigma_m) logger.info('Sigma B: %f' % mean_sigma_b) logger.info('Sigma M: %f' % mean_sigma_m) # Wrtie the parameters Command.start("Writing experiments to %s" % params.output) dump = ExperimentListDumper(experiments) with open(params.output, "w") as outfile: outfile.write(dump.as_json()) Command.end("Wrote experiments to %s" % params.output)
def run(args, out=sys.stdout, auto_extract_labels=True, use_current_directory_if_not_specified=False, warn=True): master_params = libtbx.phil.parse(master_phil_str, process_includes=True) if (len(args) == 0): print("""\ ************************************************************************ phenix.table_one - statistics harvesting for publication ************************************************************************ note: this is somewhat difficult to configure on the command line at present; you may find it more convenient to use the PHENIX GUI. """, file=out) print("# Parameter template for phenix.table_one:", file=out) master_params.show(out=out) print("# (the 'structure' scope may be copied as many times as ", file=out) print("# necessary to handle multiple datasets.)", file=out) print("# Alternate usage:", file=out) print("# phenix.table_one model.pdb data.mtz [logfile]*", file=out) return None if (warn): print(""" note: this is somewhat difficult to configure on the command line at present; you may find it more convenient to use the PHENIX GUI. """, file=out) time.sleep(2) master_parmas = libtbx.phil.parse(master_phil_str) interpreter = libtbx.phil.command_line.argument_interpreter( master_phil=master_params, home_scope="table_one") file_phil = [] cmdline_phil = [] pdb_file = None mtz_file = None unmerged_data = None log_files = [] for arg in args: if os.path.isfile(arg): f = file_reader.any_file(arg) if (f.file_type == "phil"): file_phil.append(f.file_object) elif (f.file_type == "pdb"): pdb_file = f.file_name elif (f.file_type == "hkl"): mtz_file = f.file_name elif (f.file_type == "txt"): log_files.append(f.file_name) else: if arg.startswith("unmerged_data="): unmerged_data = os.path.abspath("=".join(arg.split("=")[1:])) continue if arg.startswith("--"): arg = arg[2:] + "=True" try: arg_phil = interpreter.process(arg=arg) except RuntimeError: print("Ignoring unknown argument %s" % arg, file=out) else: cmdline_phil.append(arg_phil) working_phil = master_params.fetch(sources=file_phil + cmdline_phil) params = working_phil.extract() if (pdb_file is not None): if (len(params.table_one.structure) > 0): raise Sorry( "You already have a structure defined in the parameter " + "file; to add structures, you should edit the parameters instead of " + "specifying additional PDB and data files on the command line." ) if (mtz_file is None): raise Sorry( "You have supplied a PDB file, but no corresponding MTZ " + "file.") log_file_str = "\n".join(["log_file=%s" % f for f in log_files]) structure_params = libtbx.phil.parse(structure_params_str) new_structure = structure_params.extract().structure[0] new_structure.pdb_file = pdb_file new_structure.mtz_file = mtz_file new_structure.unmerged_data = unmerged_data params.table_one.structure.append(new_structure) if auto_extract_labels: extract_labels(params.table_one, out=out) if use_current_directory_if_not_specified: if (params.table_one.output.directory is None): params.table_one.output.directory = os.getcwd() validate_params(params) if (params.table_one.multiprocessing.nproc is None): params.table_one.multiprocessing.nproc = 1 final_phil = master_params.format(python_object=params) if params.table_one.output.verbose: print("", file=out) print("#Final effective parameters:", file=out) final_phil.show(out=out) print("#---end", file=out) print("", file=out) with open("table_one.eff", "w") as f: final_phil.show(out=f) table1 = table_one(params.table_one, out=out) easy_pickle.dump("%s.pkl" % params.table_one.output.base_name, table1) table1.save_multiple(file_base=params.table_one.output.base_name, formats=params.table_one.output.format) return table1
def run(args, command_name="phenix.cif_as_mtz", out=sys.stdout, return_as_miller_arrays=False): if (len(args) == 0): args = ["--help"] try: command_line = (iotbx_option_parser( usage="%s [reflection_cif_file] [options]" % command_name, description='Example: %s r1o9ksf.ent --symmetry=pdb1o9k.ent' % command_name ).enable_symmetry_comprehensive().option( None, "--output_file_name", action="store", default=False, type="string", help="Output mtz file name." ).option( None, "--wavelength_id", action="store", default=None, type="int", help="Extract data set with given wavelength_id." ).option( None, "--crystal_id", action="store", default=None, type="int", help="Extract data set with given crystal_id." ).option( None, "--output_r_free_label", action="store", default="R-free-flags", type="string", help= "MTZ column label to use for R-free flags (default: R-free-flags)" ).option( None, "--merge", action="store_true", help="Merge non-unique data where present." ).option( None, "--incompatible_flags_to_work_set", action="store_true", help= "When merging place reflections with incompatible flags into the " "working set." ).option( None, "--remove_systematic_absences", action="store_true", help="Remove systematic absent reflections." ).option( None, "--map_to_asu", action="store_true", help="Map to asymmetric unit." ).option( "--show_details_if_error", action="store_true", help="Show data details for some errors." ).option( "--show_log", action="store_true", help="Show some output." ).option( "--ignore_bad_sigmas", action="store_true", help= "Set sigmas to None instead of raising an error when bad sigmas " "are present." ).option( "--extend_flags", action="store_true", help="Extend R-free flags to cover all reflections if necessary.") ).process(args=args) except Exception as e: if (str(e) != "0"): print(str(e)) sys.exit(0) crystal_symmetry = command_line.symmetry if (len(command_line.args) > 1): print("%d arguments are given from the command line:"% \ len(command_line.args), command_line.args, file=out) raise Sorry("Please specify one reflection cif file.") file_name = command_line.args[0] if (not os.path.isfile(file_name)): raise Sorry("File is not found: %s" % file_name) output_r_free_label = command_line.options.output_r_free_label if ((not output_r_free_label[0] in string.ascii_uppercase) or (re.search("[^a-zA-Z0-9_\-]", output_r_free_label))): raise Sorry(( "%s is not a suitable column label. MTZ format requires " + "an uppercase letter as the first character, and only alphanumeric " + "characters or hyphens in the rest of the string.") % output_r_free_label) result = process_files( file_name=file_name, crystal_symmetry=crystal_symmetry, output_file_name=command_line.options.output_file_name, wavelength_id=command_line.options.wavelength_id, crystal_id=command_line.options.crystal_id, show_details_if_error=command_line.options.show_details_if_error, output_r_free_label=command_line.options.output_r_free_label, merge_non_unique_under_symmetry=command_line.options.merge, map_to_asu=command_line.options.map_to_asu, remove_systematic_absences=command_line.options. remove_systematic_absences, incompatible_flags_to_work_set=command_line.options. incompatible_flags_to_work_set, return_as_miller_arrays=return_as_miller_arrays, ignore_bad_sigmas=command_line.options.ignore_bad_sigmas, extend_flags=command_line.options.extend_flags, log=out) if return_as_miller_arrays: return result
def process_inputs(args, log=sys.stdout): print >> log, "-" * 79 print >> log, "PProbe RUN at %s" % time.ctime() print >> log, "Processing all Inputs:" #process phils in order to not overwrite inputs with defaults #phil from above master_phil = phil.parse(master_params_str, process_includes=True) #map params from phenix defaults (phil) maps_phil = phil.parse(mmtbx.maps.map_and_map_coeff_params_str) search_phil = phil.parse(peak_search_param_str) #merge phil objects? total_phil = master_phil.fetch(sources=[maps_phil, search_phil]) #inputs is somehow different -- object with specific params and lists of files #process after all phil? inputs = mmtbx.utils.process_command_line_args(args=args, master_params=total_phil) #params object contains all command line parameters working_phil = inputs.params params = working_phil.extract() #check for master param dictionary if params.input.model_param.model_dict_file is None: params.input.model_param.model_dict_file = "pprobe_master.dict" if not os.path.isfile(params.input.model_param.model_dict_file): print >> log, "WARNING -- param file not found!" print >> log, "--> trying pprobe_master.dict . . . " params.input.model_param.model_dict_file = "pprobe_master.dict" if not os.path.isfile(params.input.model_param.model_dict_file): raise Sorry("Master Param Dictionary %s not found!" % params.input.model_param.model_dict_file) if params.pprobe.extract: #check for proper PDB input #count up PDB files found pdb_count = len(inputs.pdb_file_names) for pdbin in (params.input.pdb.model_pdb, params.input.pdb.strip_pdb, params.input.pdb.peaks_pdb): if pdbin is not None: pdb_count = pdb_count + 1 if (pdb_count == 1) and (len(inputs.pdb_file_names) == 1): #one vanilla pdb to be used as model params.input.pdb.model_pdb = inputs.pdb_file_names[0] elif (pdb_count == 3) and (len(inputs.pdb_file_names) == 0): pass #three explicit PDBs, hopefully correct else: raise Sorry("\n\tInput 1 PDB for automatic stripping and peak finding \n"+\ "\tor all PDB files specifically, like so: \n"+\ "\tfor explicit input: \n"+\ "\t\tmodel_pdb=XXX.pdb strip_pdb=YYY.pdb peaks_pdb=ZZZ.pdb \n"+\ "\tfor automatic pdb generation: \n"+\ "\t\tXXX.pdb") #check for proper reflection file input reflection_files = inputs.reflection_files if (len(reflection_files) == 0): raise Sorry("Reflection data or map coefficients required") if (len(reflection_files) > 1): raise Sorry("Only one type of reflection data can be entered \n"+\ "Enter map coefficients with map_coeff_file=XXX.mtz \n"+\ "or structure factor files as XXX.(any supported)") else: params.input.reflection_data.reflection_file_name = reflection_files[ 0].file_name() #filename setup model_basename = os.path.basename( params.input.pdb.model_pdb.split(".")[0]) if (len(model_basename) > 0 and params.output.output_file_name_prefix is None): params.output.output_file_name_prefix = model_basename if params.input.input_map.map_coeff_file is not None: params.input.parameters.write_maps = False new_params = master_phil.format(python_object=params) #okay, see if we're where we want to be print >> log, "Runtime Parameters:" new_params.show() #DATA PROCESSING #setup model pdb (required and should be known) crystal_symmetry = check_symmetry(inputs, params, log) model_pdb_input = iotbx.pdb.input(file_name=params.input.pdb.model_pdb) model_hier = model_pdb_input.construct_hierarchy() model_hier.remove_hd() model_xrs = model_hier.extract_xray_structure( crystal_symmetry=crystal_symmetry) #strip pdb if needed,write result if (params.input.pdb.strip_pdb is None) and (params.input.parameters.map_omit_mode != "asis"): strip_xrs, strip_hier = create_strip_pdb( model_hier, model_xrs, params.input.parameters.map_omit_mode, log) strip_filename = params.output.output_file_name_prefix + "_pprobe_strip.pdb" print >> log, "Writing Strip PDB to: ", strip_filename strip_hier.write_pdb_file(file_name=strip_filename, crystal_symmetry=crystal_symmetry, append_end=True, anisou=False) params.input.pdb.strip_pdb = strip_filename elif params.input.parameters.map_omit_mode == "asis": strip_xrs, strip_hier = model_xrs, model_hier params.input.pdb.strip_pdb = params.input.pdb.model_pdb else: strip_pdb_input = iotbx.pdb.input( file_name=params.input.pdb.strip_pdb) strip_hier = strip_pdb_input.construct_hierarchy() strip_hier.remove_hd() strip_xrs = strip_hier.extract_xray_structure( crystal_symmetry=crystal_symmetry) #Make maps if map_coefficients not input,write out by default if (params.input.input_map.map_coeff_file is None): hkl_in = file_reader.any_file( params.input.reflection_data.reflection_file_name, force_type="hkl") hkl_in.assert_file_type("hkl") reflection_files = [hkl_in.file_object] f_obs, r_free_flags = setup_reflection_data( inputs, params, crystal_symmetry, reflection_files, log) #maps object is list of miller arrays maps = create_pprobe_maps(f_obs, r_free_flags, params, strip_xrs, strip_hier, log) map_fname = params.output.output_file_name_prefix + "_pprobe_maps.mtz" print >> log, "Writing PProbe maps to MTZ file: ", map_fname maps.write_mtz_file(map_fname) params.input.input_map.map_coeff_file = params.output.output_file_name_prefix + "_pprobe_maps.mtz" else: print "READING MAP FILE: ", params.input.input_map.map_coeff_file #setup input map coefficients map_coeff = reflection_file_utils.extract_miller_array_from_file( file_name=params.input.input_map.map_coeff_file, label=params.input.input_map.map_diff_label, type="complex", log=null_log) if params.input.parameters.score_res is None: params.input.parameters.score_res = map_coeff.d_min() print >> log, " Determined Resolution Limit: %.2f" % params.input.parameters.score_res print >> log, " -->Override with \"score_res=XXX\"" map_fname = params.input.input_map.map_coeff_file # if peaks not input, find and write to pdb if params.input.pdb.peaks_pdb is None: if params.input.parameters.map_omit_mode != "valsol": peaks_result = find_map_peaks(params, strip_xrs, log) pdb_str = peaks_pdb_str(peaks_result) peak_pdb = iotbx.pdb.input(source_info=None, lines=flex.split_lines(pdb_str)) peak_hier = peak_pdb.construct_hierarchy() peak_filename = params.output.output_file_name_prefix + "_pprobe_peaks.pdb" print >> log, "Writing Peaks to %s:" % peak_filename peak_hier.write_pdb_file(file_name=peak_filename, crystal_symmetry=crystal_symmetry, append_end=True, anisou=False) params.input.pdb.peaks_pdb = peak_filename else: peak_filename = params.output.output_file_name_prefix + "_pprobe_peaks.pdb" peak_xrs, peak_hier = create_sol_pdb( model_hier, model_xrs, params.input.parameters.map_omit_mode, log) print >> log, "Writing Peaks to %s:" % peak_filename peak_hier.write_pdb_file(file_name=peak_filename, crystal_symmetry=crystal_symmetry, append_end=True, anisou=False) params.input.pdb.peaks_pdb = peak_filename #Wrap up, display file names and info for manual input #save parameters for next stage new_phil = working_phil.format(python_object=params) phil_fname = params.output.output_file_name_prefix + "_pprobe.param" f = open(phil_fname, "w") f.write(new_phil.as_str()) f.close() print >> log, "_" * 79 print >> log, "Inputs Processed, final files:" print >> log, " Model PDB: ", params.input.pdb.model_pdb print >> log, " Strip PDB: ", params.input.pdb.strip_pdb print >> log, " Peaks PDB: ", params.input.pdb.peaks_pdb print >> log, " Map Coeff: ", map_fname print >> log, " Resolution: %.2f" % params.input.parameters.score_res print >> log, " Params: ", phil_fname #also return params return params else: #only rescoring from pkl #filename setup pkl_basename = os.path.basename( params.input.data_pkl.peak_dict.split(".")[0]) if (len(pkl_basename) > 0 and params.output.output_file_name_prefix is None): params.output.output_file_name_prefix = pkl_basename pkl_file = params.input.data_pkl.peak_dict if not os.path.isfile(pkl_file): raise Sorry("\n\tPKL input requested but no file available\n"+\ "\t\t\t cannot find %s" % pkl_file) new_phil = working_phil.format(python_object=params) phil_fname = params.output.output_file_name_prefix + "_pprobe.param" f = open(phil_fname, "w") f.write(new_phil.as_str()) f.close() new_params = master_phil.format(python_object=params) print >> log, "Runtime Parameters:" new_params.show() return params