def process_experiments(self, tag, experiments, img_id): import os if not self.params.output.composite_output: self.setup_filenames(tag) self.img_id = img_id self.tag = tag self.debug_start(tag) try: if self.params.LS49.dump_CBF: from dxtbx.format.cbf_writer import FullCBFWriter # assuming one imageset per experiment here : applicable for stills ts = experiments[0].imageset.get_image_identifier(0) xfel_ts = ts[0:4] + ts[5:7] + ts[8:10] + ts[11:13] + ts[ 14:16] + ts[17:19] + ts[20:23] print('TIMESTAMPS = ', xfel_ts) if xfel_ts in self.timestamps_to_dump: cbf_path = os.path.join(self.params.output.logging_dir, 'jungfrauhit_%s.cbf' % xfel_ts) cbf_writer = FullCBFWriter( imageset=experiments[0].imageset) cbf_writer.write_cbf(cbf_path) return None except Exception as e: print('Error dumping CBFs', tag, str(e)) # Do spotfinding try: self.debug_write("spotfind_start") observed = self.find_spots(experiments) if not self.params.LS49.predict_spots: return observed except Exception as e: print("Error spotfinding", tag, str(e)) return None try: self.debug_write('spot_prediction_start') observed = self.predict_spots_from_rayonix_crystal_model( experiments, observed) return observed except Exception as e: print("Error spotfinding - in spot_prediction", tag, str(e)) return None
def run(argv=None): """Compute mean, standard deviation, and maximum projection images from a set of images given on the command line. @param argv Command line argument list @return @c 0 on successful termination, @c 1 on error, and @c 2 for command line syntax errors """ import libtbx.load_env from libtbx import option_parser if argv is None: argv = sys.argv command_line = (option_parser.option_parser( usage="%s [-v] [-a PATH] [-m PATH] [-s PATH] " "image1 image2 [image3 ...]" % libtbx.env.dispatcher_name).option( None, "--average-path", "-a", type="string", default="avg.cbf", dest="avg_path", metavar="PATH", help="Write average image to PATH", ).option( None, "--maximum-path", "-m", type="string", default="max.cbf", dest="max_path", metavar="PATH", help="Write maximum projection image to PATH", ).option( None, "--stddev-path", "-s", type="string", default="stddev.cbf", dest="stddev_path", metavar="PATH", help="Write standard deviation image to PATH", ).option( None, "--verbose", "-v", action="store_true", default=False, dest="verbose", help="Print more information about progress", ).option( None, "--nproc", "-n", type="int", default=1, dest="nproc", help="Number of processors", ).option( None, "--num-images-max", "-N", type="int", default=None, dest="num_images_max", help="Maximum number of frames to average", ).option( None, "--skip-images", "-S", type="int", default=None, dest="skip_images", help="Number of images to skip at the start of the dataset", ).option( None, "--mpi", None, type=bool, default=False, dest="mpi", help="Set to enable MPI processing", )).process(args=argv[1:]) # Note that it is not an error to omit the output paths, because # certain statistics could still be printed, e.g. with the verbose # option. paths = command_line.args if len(paths) == 0: command_line.parser.print_usage(file=sys.stderr) return 2 if len(paths) == 1: # test if the iamge is a multi-image from dxtbx.datablock import DataBlockFactory datablocks = DataBlockFactory.from_filenames([paths[0]]) assert len(datablocks) == 1 datablock = datablocks[0] imagesets = datablock.extract_imagesets() assert len(imagesets) == 1 imageset = imagesets[0] if not imageset.reader().is_single_file_reader(): from libtbx.utils import Usage raise Usage("Supply more than one image") worker = multi_image_worker(command_line, paths[0], imageset) iterable = range(len(imageset)) else: # Multiple images provided worker = single_image_worker(command_line) iterable = paths if command_line.options.skip_images is not None: if command_line.options.skip_images >= len(iterable): from libtbx.utils import Usage raise Usage("Skipping all the images") iterable = iterable[command_line.options.skip_images:] if (command_line.options.num_images_max is not None and command_line.options.num_images_max < iterable): iterable = iterable[:command_line.options.num_images_max] assert len(iterable) >= 2, "Need more than one image to average" if len(paths) > 1: from dxtbx.datablock import DataBlockFactory datablocks = DataBlockFactory.from_filenames([iterable[0]]) assert len(datablocks) == 1 datablock = datablocks[0] imagesets = datablock.extract_imagesets() assert len(imagesets) == 1 imageset = imagesets[0] from libtbx import easy_mp if command_line.options.mpi: try: from mpi4py import MPI except ImportError: raise Sorry("MPI not found") comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() # chop the list into pieces, depending on rank. This assigns each process # events such that the get every Nth event where N is the number of processes iterable = [ iterable[i] for i in xrange(len(iterable)) if (i + rank) % size == 0 ] results = [worker(iterable)] results = comm.gather(results, root=0) if rank != 0: return results_set = [] for r in results: results_set.extend(r) results = results_set else: if command_line.options.nproc == 1: results = [worker(iterable)] else: iterable = splitit(iterable, command_line.options.nproc) results = easy_mp.parallel_map( func=worker, iterable=iterable, processes=command_line.options.nproc) nfail = 0 nmemb = 0 for ( i, ( r_nfail, r_nmemb, r_max_img, r_sum_distance, r_sum_img, r_ssq_img, r_sum_wavelength, ), ) in enumerate(results): nfail += r_nfail nmemb += r_nmemb if i == 0: max_img = r_max_img sum_distance = r_sum_distance sum_img = r_sum_img ssq_img = r_ssq_img sum_wavelength = r_sum_wavelength else: for p in xrange(len(sum_img)): sel = (r_max_img[p] > max_img[p]).as_1d() max_img[p].set_selected(sel, r_max_img[p].select(sel)) sum_img[p] += r_sum_img[p] ssq_img[p] += r_ssq_img[p] sum_distance += r_sum_distance sum_wavelength += r_sum_wavelength # Early exit if no statistics were accumulated. if command_line.options.verbose: sys.stderr.write("Processed %d images (%d failed)\n" % (nmemb, nfail)) if nmemb == 0: return 0 # Calculate averages for measures where other statistics do not make # sense. Note that avg_img is required for stddev_img. avg_img = tuple( [sum_img[p].as_double() / nmemb for p in xrange(len(sum_img))]) avg_distance = sum_distance / nmemb avg_wavelength = sum_wavelength / nmemb detector = imageset.get_detector() h = detector.hierarchy() origin = h.get_local_origin() h.set_local_frame( h.get_local_fast_axis(), h.get_local_slow_axis(), (origin[0], origin[1], -avg_distance), ) imageset.get_beam().set_wavelength(avg_wavelength) # Output the average image, maximum projection image, and standard # deviation image, if requested. if command_line.options.avg_path is not None: for p in xrange(len(detector)): fast, slow = detector[p].get_image_size() avg_img[p].resize(flex.grid(slow, fast)) writer = FullCBFWriter(imageset=imageset) cbf = writer.get_cbf_handle(header_only=True) writer.add_data_to_cbf(cbf, data=avg_img) writer.write_cbf(command_line.options.avg_path, cbf=cbf) if command_line.options.max_path is not None: for p in xrange(len(detector)): fast, slow = detector[p].get_image_size() max_img[p].resize(flex.grid(slow, fast)) max_img = tuple(max_img) writer = FullCBFWriter(imageset=imageset) cbf = writer.get_cbf_handle(header_only=True) writer.add_data_to_cbf(cbf, data=max_img) writer.write_cbf(command_line.options.max_path, cbf=cbf) if command_line.options.stddev_path is not None: stddev_img = [] for p in xrange(len(detector)): stddev_img.append(ssq_img[p].as_double() - sum_img[p].as_double() * avg_img[p]) # Accumulating floating-point numbers introduces errors, which may # cause negative variances. Since a two-pass approach is # unacceptable, the standard deviation is clamped at zero. stddev_img[p].set_selected(stddev_img[p] < 0, 0) if nmemb == 1: stddev_img[p] = flex.sqrt(stddev_img[p]) else: stddev_img[p] = flex.sqrt(stddev_img[p] / (nmemb - 1)) fast, slow = detector[p].get_image_size() stddev_img[p].resize(flex.grid(slow, fast)) stddev_img = tuple(stddev_img) writer = FullCBFWriter(imageset=imageset) cbf = writer.get_cbf_handle(header_only=True) writer.add_data_to_cbf(cbf, data=stddev_img) writer.write_cbf(command_line.options.stddev_path, cbf=cbf) return 0
def run(argv=None): """Compute mean, standard deviation, and maximum projection images from a set of images given on the command line. @param argv Command line argument list @return @c 0 on successful termination, @c 1 on error, and @c 2 for command line syntax errors """ if argv is None: argv = sys.argv dxtbx.util.encode_output_as_utf8() progname = os.getenv("LIBTBX_DISPATCHER_NAME") if not progname or progname.endswith(".python"): progname = "%prog" command_line = (option_parser.option_parser( usage= f"{progname} [-v] [-a PATH] [-m PATH] [-s PATH] image1 image2 [image3 ...]" ).option( None, "--average-path", "-a", type="string", default="avg.cbf", dest="avg_path", metavar="PATH", help="Write average image to PATH", ).option( None, "--maximum-path", "-m", type="string", default="max.cbf", dest="max_path", metavar="PATH", help="Write maximum projection image to PATH", ).option( None, "--stddev-path", "-s", type="string", default="stddev.cbf", dest="stddev_path", metavar="PATH", help="Write standard deviation image to PATH", ).option( None, "--verbose", "-v", action="store_true", default=False, dest="verbose", help="Print more information about progress", ).option( None, "--nproc", "-n", type="int", default=1, dest="nproc", help="Number of processors", ).option( None, "--num-images-max", "-N", type="int", default=None, dest="num_images_max", help="Maximum number of frames to average", ).option( None, "--skip-images", "-S", type="int", default=None, dest="skip_images", help="Number of images to skip at the start of the dataset", ).option( None, "--mpi", None, type=bool, default=False, dest="mpi", help="Set to enable MPI processing", )).process(args=argv[1:]) # Note that it is not an error to omit the output paths, because # certain statistics could still be printed, e.g. with the verbose # option. paths = command_line.args if len(paths) == 0: command_line.parser.print_usage(file=sys.stderr) return 2 experiments = ExperimentListFactory.from_filenames([paths[0]], load_models=False) if len(paths) == 1: worker = multi_image_worker(command_line, paths[0], experiments) iterable = list(range(len(experiments))) else: # Multiple images provided worker = single_image_worker(command_line) iterable = paths if command_line.options.skip_images is not None: if command_line.options.skip_images >= len(iterable): raise Usage("Skipping all the images") iterable = iterable[command_line.options.skip_images:] if (command_line.options.num_images_max is not None and command_line.options.num_images_max < len(iterable)): iterable = iterable[:command_line.options.num_images_max] assert len(iterable) >= 2, "Need more than one image to average" if command_line.options.mpi: try: from mpi4py import MPI except ImportError: raise Sorry("MPI not found") comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() # chop the list into pieces, depending on rank. This assigns each process # events such that the get every Nth event where N is the number of processes iterable = [ i for n, i in enumerate(iterable) if (n + rank) % size == 0 ] ( r_nfail, r_nmemb, r_max_img, r_sum_distance, r_sum_img, r_ssq_img, r_sum_wavelength, ) = worker(iterable) nfail = np.array([0]) nmemb = np.array([0]) sum_distance = np.array([0.0]) sum_wavelength = np.array([0.0]) comm.Reduce(np.array([r_nfail]), nfail) comm.Reduce(np.array([r_nmemb]), nmemb) comm.Reduce(np.array([r_sum_distance]), sum_distance) comm.Reduce(np.array([r_sum_wavelength]), sum_wavelength) nfail = int(nfail[0]) nmemb = int(nmemb) sum_distance = float(sum_distance[0]) sum_wavelength = float(sum_wavelength[0]) def reduce_image(data, op=MPI.SUM): result = [] for panel_data in data: panel_data = panel_data.as_numpy_array() reduced_data = np.zeros(panel_data.shape).astype( panel_data.dtype) comm.Reduce(panel_data, reduced_data, op=op) result.append(flex.double(reduced_data)) return result max_img = reduce_image(r_max_img, MPI.MAX) sum_img = reduce_image(r_sum_img) ssq_img = reduce_image(r_ssq_img) if rank != 0: return avg_img = tuple(s / nmemb for s in sum_img) else: if command_line.options.nproc == 1: results = [worker(iterable)] else: iterable = splitit(iterable, command_line.options.nproc) results = easy_mp.parallel_map( func=worker, iterable=iterable, processes=command_line.options.nproc) nfail = 0 nmemb = 0 for ( i, ( r_nfail, r_nmemb, r_max_img, r_sum_distance, r_sum_img, r_ssq_img, r_sum_wavelength, ), ) in enumerate(results): nfail += r_nfail nmemb += r_nmemb if i == 0: max_img = r_max_img sum_distance = r_sum_distance sum_img = r_sum_img ssq_img = r_ssq_img sum_wavelength = r_sum_wavelength else: for p in range(len(sum_img)): sel = (r_max_img[p] > max_img[p]).as_1d() max_img[p].set_selected(sel, r_max_img[p].select(sel)) sum_img[p] += r_sum_img[p] ssq_img[p] += r_ssq_img[p] sum_distance += r_sum_distance sum_wavelength += r_sum_wavelength # Early exit if no statistics were accumulated. if command_line.options.verbose: sys.stdout.write("Processed %d images (%d failed)\n" % (nmemb, nfail)) if nmemb == 0: return 0 # Calculate averages for measures where other statistics do not make # sense. Note that avg_img is required for stddev_img. avg_img = tuple(s.as_double() / nmemb for s in sum_img) avg_distance = sum_distance / nmemb avg_wavelength = sum_wavelength / nmemb expt = experiments[0] expt.load_models() detector = expt.detector h = detector.hierarchy() origin = h.get_local_origin() h.set_local_frame( h.get_local_fast_axis(), h.get_local_slow_axis(), (origin[0], origin[1], -avg_distance), ) expt.beam.set_wavelength(avg_wavelength) assert expt.beam.get_wavelength() == expt.imageset.get_beam( 0).get_wavelength() # Output the average image, maximum projection image, and standard # deviation image, if requested. if command_line.options.avg_path is not None: for n, d in enumerate(detector): fast, slow = d.get_image_size() avg_img[n].resize(flex.grid(slow, fast)) writer = FullCBFWriter(imageset=expt.imageset) cbf = writer.get_cbf_handle(header_only=True) writer.add_data_to_cbf(cbf, data=avg_img) writer.write_cbf(command_line.options.avg_path, cbf=cbf) if command_line.options.max_path is not None: for n, d in enumerate(detector): fast, slow = d.get_image_size() max_img[n].resize(flex.grid(slow, fast)) max_img = tuple(max_img) writer = FullCBFWriter(imageset=expt.imageset) cbf = writer.get_cbf_handle(header_only=True) writer.add_data_to_cbf(cbf, data=max_img) writer.write_cbf(command_line.options.max_path, cbf=cbf) if command_line.options.stddev_path is not None: stddev_img = [] for n, d in enumerate(detector): stddev_img.append(ssq_img[n].as_double() - sum_img[n].as_double() * avg_img[n]) # Accumulating floating-point numbers introduces errors, which may # cause negative variances. Since a two-pass approach is # unacceptable, the standard deviation is clamped at zero. stddev_img[n].set_selected(stddev_img[n] < 0, 0) if nmemb == 1: stddev_img[n] = flex.sqrt(stddev_img[n]) else: stddev_img[n] = flex.sqrt(stddev_img[n] / (nmemb - 1)) fast, slow = d.get_image_size() stddev_img[n].resize(flex.grid(slow, fast)) stddev_img = tuple(stddev_img) writer = FullCBFWriter(imageset=expt.imageset) cbf = writer.get_cbf_handle(header_only=True) writer.add_data_to_cbf(cbf, data=stddev_img) writer.write_cbf(command_line.options.stddev_path, cbf=cbf) return 0
def run(self): """Execute the script.""" from dials.util import log from time import time from libtbx import easy_mp import copy # Parse the command line params, options, all_paths = self.parser.parse_args( show_diff_phil=False, return_unhandled=True, quick_parse=True) # Check we have some filenames if not all_paths: self.parser.print_help() return # Mask validation for mask_path in params.spotfinder.lookup.mask, params.integration.lookup.mask: if mask_path is not None and not os.path.isfile(mask_path): raise Sorry("Mask %s not found" % mask_path) # Save the options self.options = options self.params = params st = time() # Configure logging #log.config( # params.verbosity, info="exafel_spotfinding.process.log", debug="exafel.spot_finding.debug.log" #) bad_phils = [f for f in all_paths if os.path.splitext(f)[1] == ".phil"] if len(bad_phils) > 0: self.parser.print_help() logger.error( "Error: the following phil files were not understood: %s" % (", ".join(bad_phils))) return # Log the diff phil diff_phil = self.parser.diff_phil.as_str() if diff_phil is not "": logger.info("The following parameters have been modified:\n") logger.info(diff_phil) for abs_params in self.params.integration.absorption_correction: if abs_params.apply: if not (self.params.integration.debug.output and not self.params.integration.debug.separate_files): raise Sorry( "Shoeboxes must be saved to integration intermediates to apply an absorption correction. " + "Set integration.debug.output=True, integration.debug.separate_files=False and " + "integration.debug.delete_shoeboxes=True to temporarily store shoeboxes." ) self.load_reference_geometry() from dials.command_line.dials_import import ManualGeometryUpdater update_geometry = ManualGeometryUpdater(params) # Import stuff logger.info("Loading files...") pre_import = params.dispatch.pre_import or len(all_paths) == 1 if True: #pre_import: # Handle still imagesets by breaking them apart into multiple experiments # Further handle single file still imagesets (like HDF5) by tagging each # frame using its index experiments = ExperimentList() for path in all_paths: experiments.extend(do_import(path, load_models=False)) indices = [] basenames = [] split_experiments = [] for i, imageset in enumerate(experiments.imagesets()): assert len(imageset) == 1 paths = imageset.paths() indices.append(i) basenames.append( os.path.splitext(os.path.basename(paths[0]))[0]) split_experiments.append(experiments[i:i + 1]) tags = [] for i, basename in zip(indices, basenames): if basenames.count(basename) > 1: tags.append("%s_%05d" % (basename, i)) else: tags.append(basename) # Wrapper function def do_work(i, item_list): processor = SpotFinding_Processor(copy.deepcopy(params), composite_tag="%04d" % i, rank=i) if params.LS49.dump_CBF: print('READING IN TIMESTAMPS TO DUMP') # Read in file with timestamps information processor.timestamps_to_dump = [] for fin in glob.glob( os.path.join( self.params.LS49. path_to_rayonix_crystal_models, 'idx-fee_data*')): #for fin in glob.glob(os.path.join(self.params.LS49.path_to_rayonix_crystal_models, 'int-0-*')): int_file = os.path.basename(fin) ts = int_file[13:30] processor.timestamps_to_dump.append(ts) #with open(os.path.join(self.params.output.output_dir,'../timestamps_to_dump.dat'), 'r') as fin: # for line in fin: # if line !='\n': # ts = line.split()[0].strip() # processor.timestamps_to_dump.append(ts) from dials.array_family import flex all_spots_from_rank = flex.reflection_table() for item in item_list: try: assert len(item[1]) == 1 experiment = item[1][0] experiment.load_models() imageset = experiment.imageset update_geometry(imageset) experiment.beam = imageset.get_beam() experiment.detector = imageset.get_detector() except RuntimeError as e: logger.warning( "Error updating geometry on item %s, %s" % (str(item[0]), str(e))) continue if self.reference_detector is not None: from dxtbx.model import Detector experiment = item[1][0] imageset = experiment.imageset imageset.set_detector( Detector.from_dict( self.reference_detector.to_dict())) experiment.detector = imageset.get_detector() refl_table = processor.process_experiments( item[0], item[1], item[2]) if refl_table is not None: all_spots_from_rank.extend(refl_table) processor.finalize() return all_spots_from_rank iterable = zip(tags, split_experiments, indices) # Process the data if params.mp.method == 'mpi': from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank( ) # each process in MPI has a unique id, 0-indexed size = comm.Get_size( ) # size: number of processes running in this job # Configure the logging if params.output.logging_dir is None: info_path = '' debug_path = '' else: import sys log_path = os.path.join(params.output.logging_dir, "log_rank%04d.out" % rank) error_path = os.path.join(params.output.logging_dir, "error_rank%04d.out" % rank) print("Redirecting stdout to %s" % log_path) print("Redirecting stderr to %s" % error_path) sys.stdout = open(log_path, 'a', buffering=0) sys.stderr = open(error_path, 'a', buffering=0) print("Should be redirected now") info_path = os.path.join(params.output.logging_dir, "info_rank%04d.out" % rank) debug_path = os.path.join(params.output.logging_dir, "debug_rank%04d.out" % rank) from dials.util import log print('IOTA_ALL_SPOTS_RANKS_0') #log.config(params.verbosity, info=info_path, debug=debug_path) subset = [ item for i, item in enumerate(iterable) if (i + rank) % size == 0 ] all_spots_from_rank = do_work(rank, subset) all_spots_rank0 = comm.gather(all_spots_from_rank, root=0) print('IOTA_ALL_SPOTS_RANKS_1') exit() if rank == 0: from dials.array_family import flex all_spots = flex.reflection_table() for ii, refl_table in enumerate(all_spots_rank0): if refl_table is not None: all_spots.extend(refl_table) from libtbx.easy_pickle import dump #dump('all_spots.pickle', all_spots_rank0) #dump('all_experiments.pickle', experiments) #print ('IOTA_ALL_SPOTS_RANKS_2') #print ('IOTA_ALL_SPOTS_RANKS_3') from dials.algorithms.spot_finding import per_image_analysis from six.moves import cStringIO as StringIO s = StringIO() # Assuming one datablock. Might be dangerous # FIXME from dxtbx.format.cbf_writer import FullCBFWriter for i, imageset in enumerate(experiments.imagesets()): print("Number of centroids per image for imageset %i:" % i, file=s) #from IPython import embed; embed(); exit() print('IOTA_ALL_SPOTS_RANKS_4') stats = custom_stats_imageset( imageset, all_spots.select(all_spots['img_id'] == i)) n_spots_total = flex.int(stats.n_spots_total) max_number_of_spots = max(stats.n_spots_total) for num_spots in range(1, max_number_of_spots + 1): print("IOTA_NUMBER_OF_SPOTS %d %d" % (num_spots, len( n_spots_total.select( n_spots_total == num_spots)))) if max_number_of_spots > 0: # assuming one imageset per experiment here : applicable for stills ts = imageset.get_image_identifier(0) xfel_ts = ts[0:4] + ts[5:7] + ts[8:10] + ts[ 11:13] + ts[14:16] + ts[17:19] + ts[20:23] cbf_path = os.path.join(params.output.logging_dir, 'jungfrau_%s.cbf' % xfel_ts) cbf_writer = FullCBFWriter(imageset=imageset) cbf_writer.write_cbf(cbf_path) per_image_analysis.print_table(stats) logger.info(s.getvalue()) comm.barrier() else: do_work(0, iterable)