def initialize(self): """ Initialise the processing """ # Ensure the reflections contain bounding boxes assert "bbox" in self.reflections, "Reflections have no bbox" # Select only those reflections used in refinement selection = self.reflections.get_flags( self.reflections.flags.reference_spot) if selection.count(True) == 0: raise RuntimeError("No reference reflections given") self.reflections = self.reflections.select(selection) # Compute the block size and jobs self.compute_blocks() self.compute_jobs() self.reflections = split_partials_over_boundaries( self.reflections, self.params.integration.block.size) # Create the reflection manager self.manager = SimpleReflectionManager( self.blocks, self.reflections, self.params.integration.mp.njobs) # Parallel reading of HDF5 from the same handle is not allowed. Python # multiprocessing is a bit messed up and used fork on linux so need to # close and reopen file. self.experiments.nullify_all_single_file_reader_format_instances()
def initialize(self): """ Initialise the processing """ # Ensure the reflections contain bounding boxes assert "bbox" in self.reflections, "Reflections have no bbox" # Compute the block size and jobs self.compute_blocks() self.compute_jobs() self.reflections = split_partials_over_boundaries( self.reflections, self.params.integration.block.size) # Create the reflection manager self.manager = SimpleReflectionManager( self.blocks, self.reflections, self.params.integration.mp.njobs)
def initialize(self): """ Initialise the processing """ # Ensure the reflections contain bounding boxes assert "bbox" in self.reflections, "Reflections have no bbox" # Select only those reflections used in refinement selection = self.reflections.get_flags( self.reflections.flags.reference_spot) if selection.count(True) == 0: raise RuntimeError("No reference reflections given") self.reflections = self.reflections.select(selection) # Compute the block size and jobs self.compute_blocks() self.compute_jobs() self.reflections = split_partials_over_boundaries( self.reflections, self.params.integration.block.size) # Create the reflection manager self.manager = SimpleReflectionManager( self.blocks, self.reflections, self.params.integration.mp.njobs)
class ReferenceCalculatorManager(object): """ A class to manage processing book-keeping """ def __init__(self, experiments, reflections, params): """ Initialise the manager. :param experiments: The list of experiments :param reflections: The list of reflections :param params: The phil parameters """ # Save some data self.experiments = experiments self.reflections = reflections self.reference = None # Save some parameters self.params = params # Set the finalized flag to False self.finalized = False # Initialise the timing information # self.time = TimingInfo() self.initialize() def initialize(self): """ Initialise the processing """ # Ensure the reflections contain bounding boxes assert "bbox" in self.reflections, "Reflections have no bbox" # Select only those reflections used in refinement selection = self.reflections.get_flags( self.reflections.flags.reference_spot) if selection.count(True) == 0: raise RuntimeError("No reference reflections given") self.reflections = self.reflections.select(selection) # Compute the block size and jobs self.compute_blocks() self.compute_jobs() self.reflections = split_partials_over_boundaries( self.reflections, self.params.integration.block.size) # Create the reflection manager self.manager = SimpleReflectionManager( self.blocks, self.reflections, self.params.integration.mp.njobs) # Parallel reading of HDF5 from the same handle is not allowed. Python # multiprocessing is a bit messed up and used fork on linux so need to # close and reopen file. self.experiments.nullify_all_single_file_reader_format_instances() def task(self, index): """ Get a task. """ frames = self.manager.job(index) experiments = self.experiments reflections = self.manager.split(index) if len(reflections) == 0: logger.warning("*** WARNING: no reflections in job %d ***", index) task = NullTask(index=index, reflections=reflections) else: task = ReferenceCalculatorJob( index=index, job=frames, experiments=experiments, reflections=reflections, params=self.params, ) return task def tasks(self): """ Iterate through the tasks. """ for i in range(len(self)): yield self.task(i) def accumulate(self, result): """ Accumulate the results. """ self.manager.accumulate(result.index, result.reflections) if self.reference is None: self.reference = result.reference else: self.reference.accumulate(result.reference) def finalize(self): """ Finalize the processing and finish. """ # Check manager is finished assert self.manager.finished(), "Manager is not finished" # Set the reference profiles self.reference = self.reference.reference_profiles() self.finalized = True def result(self): """ Return the result. :return: The result """ assert self.finalized, "Manager is not finalized" return self.manager.data() def finished(self): """ Return if all tasks have finished. :return: True/False all tasks have finished """ return self.finalized and self.manager.finished() def __len__(self): """ Return the number of tasks. :return: the number of tasks """ return len(self.manager) def compute_max_block_size(self): """ Compute the required memory """ total_memory = psutil.virtual_memory().total max_memory_usage = self.params.integration.block.max_memory_usage assert max_memory_usage > 0.0, "maximum memory usage must be > 0" assert max_memory_usage <= 1.0, "maximum memory usage must be <= 1" limit_memory = int(floor(total_memory * max_memory_usage)) return MultiThreadedReferenceProfiler.compute_max_block_size( self.experiments[0].imageset, max_memory_usage=limit_memory) def compute_blocks(self): """ Compute the processing block size. """ block = self.params.integration.block max_block_size = self.compute_max_block_size() if block.size in [Auto, "auto", "Auto"]: assert block.threshold > 0, "Threshold must be > 0" assert block.threshold <= 1.0, "Threshold must be < 1" nframes = sorted([b[5] - b[4] for b in self.reflections["bbox"]]) cutoff = int(block.threshold * len(nframes)) block_size = nframes[cutoff] if block_size > max_block_size: logger.warning( "Computed block size (%s) > maximum block size (%s).", block_size, max_block_size, ) logger.warning( "Setting block size to maximum; some reflections may be partial" ) block_size = max_block_size else: scan = self.experiments[0].scan if block.units == "radians": phi0, dphi = scan.get_oscillation(deg=False) block_size = int(ceil(block.size / dphi)) elif block.units == "degrees": phi0, dphi = scan.get_oscillation() block_size = int(ceil(block.size / dphi)) elif block.units == "frames": block_size = int(ceil(block.size)) else: raise RuntimeError("Unknown block_size unit %r" % block.units) if block_size > max_block_size: raise RuntimeError(""" The requested block size (%s) is larger than the maximum allowable block size (%s). Either decrease the requested block size or increase the amount of available memory. """ % (block_size, max_block_size)) block.size = block_size block.units = "frames" def compute_jobs(self): imageset = self.experiments[0].imageset array_range = imageset.get_array_range() block = self.params.integration.block assert block.units == "frames" assert block.size > 0 self.blocks = SimpleBlockList(array_range, block.size) assert len(self.blocks) > 0, "Invalid number of jobs" def summary(self): """ Get a summary of the processing """ # Compute the task table if self.experiments.all_stills(): rows = [["#", "Group", "Frame From", "Frame To", "# Reflections"]] for i in range(len(self)): job = self.manager.job(i) group = job.index() f0, f1 = job.frames() n = self.manager.num_reflections(i) rows.append([str(i), str(group), str(f0), str(f1), str(n)]) elif self.experiments.all_sequences(): rows = [[ "#", "Frame From", "Frame To", "Angle From", "Angle To", "# Reflections", ]] for i in range(len(self)): f0, f1 = self.manager.job(i) scan = self.experiments[0].scan p0 = scan.get_angle_from_array_index(f0) p1 = scan.get_angle_from_array_index(f1) n = self.manager.num_reflections(i) rows.append( [str(i), str(f0), str(f1), str(p0), str(p1), str(n)]) else: raise RuntimeError( "Experiments must be all sequences or all stills") # The job table task_table = libtbx.table_utils.format(rows, has_header=True, justify="right", prefix=" ") # The format string if self.params.integration.block.size is None: block_size = "auto" else: block_size = str(self.params.integration.block.size) fmt = ("Processing reflections in the following blocks of images:\n" "\n" " block_size: %s %s\n" "\n" "%s\n") return fmt % (block_size, self.params.integration.block.units, task_table)