def run(args): if not libtbx.env.has_module("dials_regression"): print "Skipping exercise_index_3D_FFT_simple: dials_regression not present" return exercises = (exercise_1, exercise_2, exercise_3, exercise_4, exercise_5, exercise_6, exercise_7, exercise_8, exercise_9, exercise_10, exercise_11, exercise_12, exercise_13, exercise_14, exercise_15, exercise_16) if len(args): args = [int(arg) for arg in args] for arg in args: assert arg > 0 exercises = [exercises[arg-1] for arg in args] from libtbx import easy_mp nproc = easy_mp.get_processes(libtbx.Auto) nproc = min(nproc, len(exercises)) def run_parallel(args): assert len(args) == 1 exercise = args[0] exercise() easy_mp.parallel_map( func=run_parallel, iterable=[(e,) for e in exercises], processes=nproc)
def exercise_multiprocessing(mp_nproc=1, mp_threads=1, mp_method="multiprocessing", tasks=3): print("Running %s test with %d processes, %d threads, %d tasks" % \ (mp_method, mp_nproc, mp_threads, tasks)) # Create one shared instance of the state object and extract its initial state master_state_object = state_object() initial_state = master_state_object.get_state() # This is a function that changes the state on the object def change_stored_state(task): time.sleep(random.random() / 2 / tasks) master_state_object.generate_state() # Call the state-changing function in parallel easy_mp.parallel_map(iterable=range(tasks), func=change_stored_state, processes=mp_nproc, method=mp_method) # Get the final state of the object final_state = master_state_object.get_state() # Did it change? assert initial_state != final_state
def exercise_multiprocessing(mp_nproc=1, mp_threads=1, mp_method="multiprocessing", tasks=3): print "Running %s test with %d processes, %d threads, %d tasks" % \ (mp_method, mp_nproc, mp_threads, tasks) # Create one shared instance of the state object and extract its initial state master_state_object = state_object() initial_state = master_state_object.get_state() # This is a function that changes the state on the object def change_stored_state(task): time.sleep(random.random() / 2 / tasks) master_state_object.generate_state() # Call the state-changing function in parallel easy_mp.parallel_map( iterable=range(tasks), func=change_stored_state, processes=mp_nproc, method=mp_method) # Get the final state of the object final_state = master_state_object.get_state() # Did it change? assert initial_state != final_state
def scale_frames(frames, frame_files, iparams): """scale frames""" avg_mode = 'average' if iparams.flag_apply_b_by_frame: mean_of_mean_I = 0 else: #Calculate <I> for each frame frame_args = [(frame_file, iparams, avg_mode) for frame_file in frame_files] determine_mean_I_result = parallel_map(iterable=frame_args, func=determine_mean_I_mproc, processes=iparams.n_processors) frames_mean_I = flex.double() for result in determine_mean_I_result: if result is not None: mean_I, txt_out_result = result if mean_I is not None: frames_mean_I.append(mean_I) mean_of_mean_I = np.median(frames_mean_I) #use the calculate <mean_I> to scale each frame frame_args = [(frame_no, frame_file, iparams, mean_of_mean_I, avg_mode) for frame_no, frame_file in zip(frames, frame_files)] scale_frame_by_mean_I_result = parallel_map( iterable=frame_args, func=scale_frame_by_mean_I_mproc, processes=iparams.n_processors) observations_merge_mean_set = [] for result in scale_frame_by_mean_I_result: if result is not None: pres, txt_out_result = result if pres is not None: observations_merge_mean_set.append(pres) return observations_merge_mean_set
def run(self): try: parallel_map(iterable=self.data_list, func=self.spf_wrapper, callback=self.callback, processes=self.n_proc) except IOTATermination as e: self.terminated = True print(e) # Signal that this batch is finished try: if self.terminated: print('RUN TERMINATED!') evt = SpotFinderTerminated(tp_EVT_SPFTERM, -1) wx.PostEvent(self.parent, evt) wx.CallAfter(self.parent.onSpfAllDone, self.data_list) # info = self.data_list # evt = SpotFinderAllDone(tp_EVT_SPFALLDONE, -1, info=info) # wx.PostEvent(self.parent, evt) return except TypeError as e: print(e) return
def run(args): if not libtbx.env.has_module("dials_regression"): print "Skipping exercise_index_3D_FFT_simple: dials_regression not present" return exercises = (exercise_1, exercise_2, exercise_3, exercise_4, exercise_5, exercise_6, exercise_7, exercise_8, exercise_9, exercise_10, exercise_11, exercise_12, exercise_13, exercise_14, exercise_15, exercise_16, exercise_17) if len(args): args = [int(arg) for arg in args] for arg in args: assert arg > 0 exercises = [exercises[arg - 1] for arg in args] from libtbx import easy_mp nproc = easy_mp.get_processes(libtbx.Auto) nproc = min(nproc, len(exercises)) def run_parallel(args): assert len(args) == 1 exercise = args[0] exercise() easy_mp.parallel_map(func=run_parallel, iterable=[(e, ) for e in exercises], processes=nproc)
def process(self): ''' Do all the processing tasks. :return: The processing results ''' from time import time from libtbx import easy_mp import platform start_time = time() self.manager.initialize() mp_method = self.manager.params.mp.method mp_nproc = min(len(self.manager), self.manager.params.mp.nproc) mp_nthreads = self.manager.params.mp.nthreads if mp_nproc > 1 and platform.system() == "Windows": # platform.system() forks which is bad for MPI, so don't use it unless nproc > 1 logger.warn("") logger.warn("*" * 80) logger.warn("Multiprocessing is not available on windows. Setting nproc = 1") logger.warn("*" * 80) logger.warn("") mp_nproc = 1 assert mp_nproc > 0, "Invalid number of processors" job.nthreads = mp_nthreads logger.info(self.manager.summary()) logger.info(' Using %s with %d parallel job(s) and %d thread(s) per job\n' % ( mp_method, mp_nproc, mp_nthreads)) if mp_nproc > 1: def process_output(result): for message in result[1]: logger.log(message.levelno, message.msg) self.manager.accumulate(result[0]) result[0].reflections = None result[0].data = None def execute_task(task): from cStringIO import StringIO from dials.util import log import logging log.config_simple_cached() result = task() handlers = logging.getLogger('dials').handlers assert len(handlers) == 1, "Invalid number of logging handlers" return result, handlers[0].messages() easy_mp.parallel_map( func=execute_task, iterable=list(self.manager.tasks()), processes=mp_nproc, callback=process_output, method=mp_method, preserve_order=True, preserve_exception_message=True) else: for task in self.manager.tasks(): self.manager.accumulate(task()) self.manager.finalize() end_time = time() self.manager.time.user_time = end_time - start_time result1, result2 = self.manager.result() return result1, result2, self.manager.time
def run(self): try: parallel_map(iterable=self.data_list, func=self.spf_wrapper, callback=self.callback, preserve_exception_message=True, processes=None) except Exception, e: print 'SPOTFINDING THREAD:', e
def run(self): parallel_map(iterable=self.iterable, func = self.full_proc_wrapper, #callback = self.callback, processes=self.init.params.n_processors) end_filename = os.path.join(self.init.tmp_base, 'finish.cfg') with open(end_filename, 'w') as ef: ef.write('')
def run(self): try: parallel_map(iterable=self.data_list, func=self.spf_wrapper, callback=self.callback, processes=self.n_proc) except IOTATermination, e: self.terminated = True print e
def run(self): parallel_map( iterable=self.iterable, func=self.full_proc_wrapper, #callback = self.callback, processes=self.init.params.n_processors) end_filename = os.path.join(self.init.tmp_base, 'finish.cfg') with open(end_filename, 'w') as ef: ef.write('')
def run(self): total_procs = multiprocessing.cpu_count() try: parallel_map(iterable=self.data_list, func=self.spf_wrapper, callback=self.callback, processes=total_procs - 5) except IOTATermination, e: self.terminated = True print e
def run(self): try: parallel_map(iterable=self.iterable, func=self.full_proc_wrapper, processes=self.init.params.n_processors) end_filename = os.path.join(self.init.tmp_base, 'finish.cfg') with open(end_filename, 'w') as ef: ef.write('') except IOTATermination as e: aborted_file = os.path.join(self.init.int_base, '.aborted.tmp') with open(aborted_file, 'w') as abtf: abtf.write('') raise e
def multi_node_parallel_map( func, iterable, njobs=1, nproc=1, cluster_method=None, asynchronous=True, callback=None, preserve_order=True, preserve_exception_message=False): ''' A wrapper function to call a function using multiple cluster nodes and with multiple processors on each node ''' from libtbx.easy_mp import parallel_map # The function to all on the cluster cluster_func = MultiNodeClusterFunction( func = func, nproc = nproc, asynchronous = asynchronous, preserve_order = preserve_order, preserve_exception_message = preserve_exception_message) # Create the cluster iterable cluster_iterable = iterable_grouper(iterable, nproc) # Create the cluster callback if callback is not None: cluster_callback = MultiNodeClusterCallback(callback) else: cluster_callback = None # Set the command qsub_command = 'qsub -pe smp %d' % nproc # Do the parallel map on the cluster parallel_map( func = cluster_func, iterable = cluster_iterable, callback = cluster_callback, method = cluster_method, processes = njobs, qsub_command = qsub_command, asynchronous = asynchronous, preserve_order = preserve_order, preserve_exception_message = preserve_exception_message)
def postrefine_frames(i_iter, frames, frame_files, iparams, pres_set, miller_array_ref, avg_mode): """postrefine given frames and previous postrefinement results""" miller_array_ref = miller_array_ref.generate_bijvoet_mates() txt_merge_postref = 'Post-refinement cycle ' + str( i_iter + 1) + ' (' + avg_mode + ')\n' txt_merge_postref += ' * R and CC show percent change.\n' print txt_merge_postref frame_args = [ (frame_no, frame_file, iparams, miller_array_ref, pres_in, avg_mode) for frame_no, frame_file, pres_in in zip(frames, frame_files, pres_set) ] postrefine_by_frame_result = parallel_map(iterable=frame_args, func=postrefine_by_frame_mproc, processes=iparams.n_processors) postrefine_by_frame_good = [] postrefine_by_frame_pres_list = [] for results in postrefine_by_frame_result: if results is not None: pres, txt_out_result = results postrefine_by_frame_pres_list.append(pres) if pres is not None: postrefine_by_frame_good.append(pres) else: postrefine_by_frame_pres_list.append(None) return postrefine_by_frame_good, postrefine_by_frame_pres_list, txt_merge_postref
def run_process(self, iterable): # Create ExperimentList objects from image paths (doing it in the processor # because I can't have Python objects in the INFO JSON file) adj_iterable = [] imageseq = None crystal = None for entry in iterable: path = str(entry[1]) if path.endswith('.h5'): exp_idx = entry[0] img_idx = entry[2] if imageseq is None: exps = ExLF.from_filenames(filenames=[path]) imageseq = exps.imagesets()[0] crystal = exps[0].crystal one_image = imageseq.partial_set(img_idx, img_idx + 1) one_exp = ExLF.from_imageset_and_crystal(imageset=one_image, crystal=crystal) adj_iterable.append([exp_idx, path, img_idx, one_exp]) else: # Create ExperimentList object from CBF expr = ExLF.from_filenames(filenames=[path]) exp_entry = [entry[0], entry[1], 0, expr] adj_iterable.append(exp_entry) # Run a multiprocessing job img_objects = parallel_map(iterable=adj_iterable, func=self.import_and_process, callback=self.callback, processes=self.params.mp.n_processors) return img_objects
def check_if_stacktrace_is_propagated_properly(method, nproc): exception_seen = False from libtbx.easy_mp import parallel_map import traceback try: results = parallel_map(func=_may_divide_by_zero, iterable=[2, 1, 0], method=method, processes=nproc, preserve_exception_message=True) except ZeroDivisionError, e: exception_seen = True exc_type, exc_value, exc_traceback = sys.exc_info() assert "division by zero" in str( exc_value.message), "Exception value mismatch: '%s'" % exc_value stack_contains_fail_function = False # Two options: Either the original stack is available directly for (filename, line, function, text) in traceback.extract_tb(exc_traceback): if function == _may_divide_by_zero.func_name: stack_contains_fail_function = True # or it should be preserved in the string representation of the exception from libtbx.scheduling import stacktrace ex, st = stacktrace.exc_info() if ex is not None and _may_divide_by_zero.func_name in "".join(st): stack_contains_fail_function = True if not stack_contains_fail_function: print "Thrown exception: %s:" % str(e) traceback.print_tb(exc_traceback) print "" assert stack_contains_fail_function, "Stacktrace lost"
def print_results(candidates, params): ''' Take a list of candidates and sort into groups with matching unit cells. For each group, assign it the cell parameters of the cell giving the best score. Sort groups by score and print the scores and cell parameters. ''' i_first_matching_partial = functools.partial(i_first_matching, cand_list=candidates) i_first_matching_list = easy_mp.parallel_map( i_first_matching_partial, candidates, processes=params.multiprocessing.nproc) i_first_matching_unique = set(i_first_matching_list) results = [] for i in i_first_matching_unique: matches = [ cand for i_cand, cand in enumerate(candidates) if i == i_first_matching_list[i_cand] ] best = min(matches, key=lambda m: m.score) results.append(best) results.sort(key=lambda r: r.score) for r in results[:10]: print("{:.4f}\t{}".format(r.score, r))
def check_if_stacktrace_is_propagated_properly(method, nproc): exception_seen = False from libtbx.easy_mp import parallel_map import traceback try: results = parallel_map( func=_may_divide_by_zero, iterable=[2,1,0], method=method, processes=nproc, preserve_exception_message=True) except ZeroDivisionError, e: exception_seen = True exc_type, exc_value, exc_traceback = sys.exc_info() assert "division by zero" in str(exc_value.message), "Exception value mismatch: '%s'" % exc_value stack_contains_fail_function = False # Two options: Either the original stack is available directly for (filename, line, function, text) in traceback.extract_tb(exc_traceback): if function == _may_divide_by_zero.func_name: stack_contains_fail_function = True # or it should be preserved in the string representation of the exception from libtbx.scheduling import stacktrace ex, st = stacktrace.exc_info() if ex is not None and _may_divide_by_zero.func_name in "".join( st ): stack_contains_fail_function = True if not stack_contains_fail_function: print "Thrown exception: %s:" % str(e) traceback.print_tb(exc_traceback) print "" assert stack_contains_fail_function, "Stacktrace lost"
def compute_functional_gradients_and_curvatures(self): self.prepare_for_step() # observation terms blocks = self._target.split_matches_into_blocks(nproc=self._nproc) if self._nproc > 1: task_results = easy_mp.parallel_map( func=self._target.compute_functional_gradients_and_curvatures, iterable=blocks, processes=self._nproc, method="multiprocessing", # preserve_exception_message=True ) else: task_results = [self._target.compute_functional_gradients_and_curvatures(block) for block in blocks] # reduce blockwise results flist, glist, clist = zip(*task_results) glist = zip(*glist) clist = zip(*clist) f = sum(flist) g = [sum(g) for g in glist] c = [sum(c) for c in clist] # restraints terms restraints = self._target.compute_restraints_functional_gradients_and_curvatures() if restraints: f += restraints[0] g = [a + b for a, b in zip(g, restraints[1])] c = [a + b for a, b in zip(c, restraints[2])] return f, flex.double(g), flex.double(c)
def run_parallel( method='multiprocessing', # multiprocessing, only choice for now qsub_command='qsub', # queue command, not supported yet nproc=1, # number of processors to use target_function=None, # the method to run kw_list=None): # list of kw dictionaries for target_function n=len(kw_list) # number of jobs to run, one per kw dict if nproc==1 or n<=1: # just run it for each case in list, no multiprocessing results=[] ra=run_anything(kw_list=kw_list,target_function=target_function) for i in xrange(n): results.append(ra(i)) elif 0: #(method == "multiprocessing") and (sys.platform != "win32") : # XXX Can crash 2015-10-13 TT so don't use it from libtbx.easy_mp import pool_map results = pool_map( func=run_anything(target_function=target_function,kw_list=kw_list), iterable=xrange(n), processes=nproc) else : from libtbx.easy_mp import parallel_map results=parallel_map( func=run_anything(target_function=target_function,kw_list=kw_list), iterable=xrange(n), method=method, processes=nproc, callback=None, preserve_exception_message=True, # 2016-08-17 qsub_command=qsub_command, use_manager=True )# Always use manager 2015-10-13 TT (sys.platform == "win32")) return results
def __init__(self, params, out=sys.stdout): iotbx.table_one.table.__init__( self, text_field_separation=params.output.text_field_separation, count_anomalous_pairs_separately=params.processing.count_anomalous_pairs_separately, ) self.output_dir = os.getcwd() self.params = params self.output_files = [] make_header("Running data analysis and validation", out=out) results = easy_mp.parallel_map( iterable=range(len(self.params.structure)), func=self.run_single_structure, processes=params.multiprocessing.nproc, method=params.multiprocessing.technology, preserve_exception_message=True, ) for structure, result in zip(params.structure, results): print >> out, "" print >> out, "Collecting stats for structure %s" % structure.name column = result.validation.as_table1_column( label=structure.name, wavelength=structure.wavelength, re_compute_r_factors=params.processing.re_compute_r_factors, log=out, ) self.add_column(column)
def step_over_images(self): """Loop over images, doing the search on each and extending the predictions list""" from libtbx import easy_mp # from dials.util import mp n_images = self._scan.get_num_images() # Change the number of processors if necessary nproc = 1 if nproc > n_images: nproc = n_images iterable = self._make_blocks(n_images, nproc) ref_list_of_list = easy_mp.parallel_map( func=self._search_on_image_range, iterable=iterable, processes=nproc, method="multiprocessing", preserve_order=True, ) self._reflections = [e for l in ref_list_of_list for e in l] return
def run(params): params.topdir = os.path.abspath(params.topdir) xds_dirs = [] print "Found xds directories:" for root, dirnames, filenames in os.walk(params.topdir, followlinks=True): if "XDS.INP" in filenames: if "decision.log" in filenames and params.dont_overwrite: print "Already done - skip:", os.path.relpath( root, params.topdir) continue print "", os.path.relpath(root, params.topdir) xds_dirs.append(root) print print "Start running.." import functools from yamtbx.dataproc.auto.command_line import run_all_xds_simple if params.multiproc: npar = util.get_number_of_processors( ) if params.nproc is None else params.nproc # Override nproc if len(xds_dirs) < npar: params.nproc = npar // len(xds_dirs) else: params.nproc = 1 print "nproc=", params.nproc if params.parmethod == "sge": npar = len(xds_dirs) fun = run_all_xds_simple.xds_runmanager(params) easy_mp.parallel_map(func=fun, iterable=map(lambda x: os.path.abspath(x), xds_dirs), processes=npar, method=params.parmethod, preserve_exception_message=True) """ fun_local = lambda x: xds_sequence(x, params) easy_mp.pool_map(fixed_func=fun_local, args=xds_dirs, processes=npar) """ else: for root in xds_dirs: run_xds_sequence(root, params)
def batch_parallel_map(func=None, iterable=None, processes=None, callback=None, method=None, chunksize=1): """ A function to run jobs in batches in each process """ from libtbx import easy_mp # Call the batches in parallel easy_mp.parallel_map( func=batch_func(func), iterable=batch_iterable(iterable, chunksize), processes=processes, callback=batch_callback(callback), method=method, preserve_order=True, preserve_exception_message=True, )
def multi_node_parallel_map(func, iterable, njobs=1, nproc=1, cluster_method=None, asynchronous=True, callback=None, preserve_order=True, preserve_exception_message=False): ''' A wrapper function to call a function using multiple cluster nodes and with multiple processors on each node ''' from libtbx.easy_mp import parallel_map # The function to all on the cluster cluster_func = MultiNodeClusterFunction( func=func, nproc=nproc, asynchronous=asynchronous, preserve_order=preserve_order, preserve_exception_message=preserve_exception_message) # Create the cluster iterable cluster_iterable = iterable_grouper(iterable, nproc) # Create the cluster callback if callback is not None: cluster_callback = MultiNodeClusterCallback(callback) else: cluster_callback = None # Set the command qsub_command = 'qsub -pe smp %d' % nproc # Do the parallel map on the cluster parallel_map(func=cluster_func, iterable=cluster_iterable, callback=cluster_callback, method=cluster_method, processes=njobs, qsub_command=qsub_command, asynchronous=asynchronous, preserve_order=preserve_order, preserve_exception_message=preserve_exception_message)
def run(): with open(REFLS) as f: refls, weights = [], [] for line in f.readlines(): d, w = [float(x) for x in line.strip().split(',')] refls.append(d) weights.append(w) with open(KNOWN_GOOD) as f: known_good = [float(line.strip()) for line in f.readlines()] call_gsas((refls, weights, known_good, None)) quit() # import profile # profile.runctx('call_gsas((refls, weights, known_good, None))', globals(), locals(), filename='call_gsas.prof') # quit() current_cells = easy_mp.parallel_map(call_gsas, [(refls, weights, known_good, None) for _ in range(NPROC)], processes=NPROC) cell_man = Candidate_cell_manager() current_cells_flat = [] for l in current_cells: current_cells_flat.extend(l) for gcell in current_cells_flat: cell_man.store_cell(gcell) cell_man.maintain(force=True) min_score = cell_man.min_score current_cells = easy_mp.parallel_map( call_gsas, [(refls, weights, known_good, min_score) for _ in range(NPROC * 3)], processes=NPROC) current_cells_flat = [] for l in current_cells: current_cells_flat.extend(l) for gcell in current_cells_flat: cell_man.store_cell(gcell) cell_man.maintain(force=True) with open(sys.argv[1], 'wb') as f: pickle.dump(cell_man, f)
def run(self): try: img_objects = parallel_map(iterable=self.iterable, func=self.full_proc_wrapper, processes=self.init.params.n_processors) except IOTATermination, e: self.aborted = True print e return
def run(params): xds_dirs = [] print "Found xds directories:" for root, dirnames, filenames in os.walk(params.topdir, followlinks=True): if "XDS.INP" in filenames: if "decision.log" in filenames and params.dont_overwrite: print "Already done - skip:", os.path.relpath(root, params.topdir) continue print "", os.path.relpath(root, params.topdir) xds_dirs.append(root) print print "Start running.." import functools from yamtbx.dataproc.auto.command_line import run_all_xds_simple if params.multiproc: npar = util.get_number_of_processors() if params.nproc is None else params.nproc # Override nproc if len(xds_dirs) < npar: params.nproc = npar // len(xds_dirs) else: params.nproc = 1 print "nproc=", params.nproc if params.parmethod == "sge": npar = len(xds_dirs) fun = run_all_xds_simple.xds_runmanager(params) easy_mp.parallel_map(func=fun, iterable=map(lambda x: os.path.abspath(x), xds_dirs), processes=npar, method=params.parmethod, preserve_exception_message=True) """ fun_local = lambda x: xds_sequence(x, params) easy_mp.pool_map(fixed_func=fun_local, args=xds_dirs, processes=npar) """ else: for root in xds_dirs: xds_sequence(root, params)
def run(self): if self.init.params.mp_method == 'multiprocessing': try: img_objects = parallel_map( iterable=self.iterable, func=self.full_proc_wrapper, processes=self.init.params.n_processors) except Exception, e: print e return
def run(args): cmd_line = command_line.argument_interpreter( master_params=master_phil_scope) working_phil = cmd_line.process_and_fetch(args=args) working_phil.show() params = working_phil.extract() if params.find_spots_phil is not None: params.find_spots_phil = os.path.abspath(params.find_spots_phil) assert os.path.isfile(params.find_spots_phil) if params.index_phil is not None: params.index_phil = os.path.abspath(params.index_phil) assert os.path.isfile(params.index_phil) templates = params.template print(templates) args = [] filenames = [] for t in templates: print(t) filenames.extend(glob.glob(t)) print(filenames) from dxtbx.imageset import ImageSetFactory, ImageSweep from dxtbx.datablock import DataBlockFactory datablocks = DataBlockFactory.from_args(filenames, verbose=True) i = 0 for i, datablock in enumerate(datablocks): sweeps = datablock.extract_sweeps() for imageset in sweeps: if (isinstance(imageset, ImageSweep) and len(imageset) >= params.min_sweep_length): i += 1 print(imageset) print(imageset.get_template()) args.append((imageset.paths(), i, params)) # sort based on the first filename of each imageset args.sort(key=lambda x: x[0][0]) nproc = params.nproc results = easy_mp.parallel_map( func=run_once, iterable=args, processes=nproc, method=params.technology, qsub_command=params.qsub_command, preserve_order=True, asynchronous=False, preserve_exception_message=True, )
def integrate(self): """Do all the integration tasks. Returns The integration results """ from time import time from libtbx import easy_mp start_time = time() num_proc = len(self._manager) if self._max_procs > 0: num_proc = min(num_proc, self._max_procs) if num_proc > 1: def process_output(result): self._manager.accumulate(result[0]) print(result[1]) def execute_task(task): from cStringIO import StringIO import sys sys.stdout = StringIO() result = task() output = sys.stdout.getvalue() return result, output task_results = easy_mp.parallel_map( func=execute_task, iterable=list(self._manager.tasks()), processes=num_proc, callback=process_output, method=self._mp_method, preserve_order=True, preserve_exception_message=True, ) task_results, output = zip(*task_results) else: task_results = [task() for task in self._manager.tasks()] for result in task_results: self._manager.accumulate(result) assert self._manager.finished() end_time = time() read_time = self._manager.read_time extract_time = self._manager.extract_time process_time = self._manager.process_time total_time = end_time - start_time print("Time taken: reading images: %.2f seconds" % read_time) print("Time taken: extracting pixels: %.2f seconds" % extract_time) print("Time taken: processing data: %.2f seconds" % process_time) print("Time taken: total: %.2f seconds" % total_time) return self._manager.result()
def spotfinder_factory(absrundir,frames,phil_params): local_frames=frames.frames() A = frames.images[0] #A.readHeader()--deprecate this because it squashes any overrides # from dataset_preferences processed in imagefiles.py pd = {'directory':frames.filenames.FN[0].cwd, 'template': frames.filenames.FN[0].template, 'identifier':frames.filenames.FN[0].fileroot, 'vendortype':A.vendortype, 'binning':'%d'%A.bin, 'distance':'%f'%A.distance, 'wavelength':'%f'%A.wavelength, 'deltaphi':'%f'%A.deltaphi, } #temp values for getting coordinate convention pd['pixel_size']='%f'%A.pixel_size pd['size1']='%f'%A.size1 pd['size2']='%f'%A.size2 pd['ybeam'] = '%f'%A.beamy pd['xbeam'] = '%f'%A.beamx try: pd['twotheta'] = '%f'%A.twotheta except Exception: pd['twotheta'] = '0.0' from spotfinder.applications.practical_heuristics import heuristics_base Spotfinder = heuristics_base(pd,phil_params) from libtbx import easy_mp def run_spotfinder(args): assert len(args) == 2 framenumber, frames = args try: assert Spotfinder.images.has_key(framenumber) except Exception: Spotfinder.register_frames(framenumber,frames) if phil_params.spotfinder_verbose: Spotfinder.show() return Spotfinder iterable = [(framenumber, frames) for framenumber in local_frames] results = easy_mp.parallel_map( func=run_spotfinder, iterable=iterable, processes=phil_params.distl.nproc, method="multiprocessing", preserve_order=True ) for result in results: Spotfinder.images.update(result.images) return Spotfinder
def crystals_refiner(params, experiments, reflections): def do_work(item): iexp, exp = item print("Refining crystal", iexp) # reflection subset for a single experiment refs = reflections.select(reflections['id'] == iexp) refs['id'] = flex.int(len(refs), 0) # DGW commented out as reflections.minimum_number_of_reflections no longer exists #if len(refs) < params.refinement.reflections.minimum_number_of_reflections: # print "Not enough reflections to refine experiment" # return # experiment list for a single experiment exps = ExperimentList() exps.append(exp) try: refiner = RefinerFactory.from_parameters_data_experiments( params, refs, exps) # do refinement refiner.run() except Exception as e: print("Error,", str(e)) return refined_exps = refiner.get_experiments() # replace this experiment with the refined one experiments[iexp] = refined_exps[0] print("Beginning crystal refinement with %d processor(s)" % params.mp.nproc) easy_mp.parallel_map(func=do_work, iterable=enumerate(experiments), processes=params.mp.nproc, method=params.mp.method, asynchronous=True, preserve_exception_message=True) return experiments
def run(self): test_results = parallel_map( func=self.func, iterable=self.pdbs, # method='pbs', method='multiprocessing', preserve_exception_message=True, # processes=4, processes=len(self.pdbs), qsub_command=qsub_command, use_manager=True) for test_result in test_results: self.check_assertions(test_results)
def run_process(self): ''' Run indexing / integration of imported images ''' cmd.Command.start("Processing {} images".format(len(self.img_objects))) self.img_list = [[i, len(self.img_objects) + 1, j] for i, j in enumerate(self.img_objects, 1)] self.prog_count = 0 self.gs_prog = cmd.ProgressBar(title='PROCESSING') self.img_objects = parallel_map(iterable=self.img_list, func=self.proc_wrapper, callback=self.callback, processes=self.init.params.n_processors) cmd.Command.end("Processing {} images -- DONE " "".format(len(self.img_objects)))
def __call__(self, iterable): ''' Call the function ''' from libtbx.easy_mp import parallel_map return parallel_map( func=self.func, iterable=iterable, processes=self.nproc, method="multiprocessing", asynchronous=self.asynchronous, preserve_order=self.preserve_order, preserve_exception_message=self.preserve_exception_message)
def __call__(self, iterable): ''' Call the function ''' from libtbx.easy_mp import parallel_map return parallel_map( func = self.func, iterable = iterable, processes = self.nproc, method = "multiprocessing", asynchronous = self.asynchronous, preserve_order = self.preserve_order, preserve_exception_message = self.preserve_exception_message)
def run_all(qm_engine, fragments_extracted, indices, method='multiprocessing', processes=1, qsub_command=None, callback=None): qm_engine_object = qm_energy_manager(qm_engine, fragments_extracted) from libtbx.easy_mp import parallel_map return parallel_map(func=qm_engine_object, iterable=indices, method=method, processes=processes, callback=callback, qsub_command=qsub_command)
def integrate(self): ''' Do all the integration tasks. Returns The integration results ''' from time import time from libtbx import easy_mp start_time = time() num_proc = len(self._manager) if self._max_procs > 0: num_proc = min(num_proc, self._max_procs) if num_proc > 1: def process_output(result): self._manager.accumulate(result[0]) print result[1] def execute_task(task): from cStringIO import StringIO import sys sys.stdout = StringIO() result = task() output = sys.stdout.getvalue() return result, output task_results = easy_mp.parallel_map( func=execute_task, iterable=list(self._manager.tasks()), processes=num_proc, callback=process_output, method=self._mp_method, preserve_order=True, preserve_exception_message=True) task_results, output = zip(*task_results) else: task_results = [task() for task in self._manager.tasks()] for result in task_results: self._manager.accumulate(result) assert(self._manager.finished()) end_time = time() read_time = self._manager.read_time extract_time = self._manager.extract_time process_time = self._manager.process_time total_time = end_time - start_time print "Time taken: reading images: %.2f seconds" % read_time print "Time taken: extracting pixels: %.2f seconds" % extract_time print "Time taken: processing data: %.2f seconds" % process_time print "Time taken: total: %.2f seconds" % total_time return self._manager.result()
def run(self): try: img_objects = parallel_map(iterable=self.iterable, func=self.full_proc_wrapper, processes=self.init.params.n_processors) except IOTATermination as e: self.aborted = True print e return # Send "all done" event to GUI try: evt = AllDone(tp_EVT_ALLDONE, -1, img_objects=img_objects) wx.PostEvent(self.parent, evt) except Exception as e: pass
def work_all(filenames, args, nproc): from libtbx import easy_mp cl = args args = [] for f in filenames: args.append((f, cl)) results = easy_mp.parallel_map(func=work, iterable=args, processes=nproc, method="multiprocessing", preserve_order=True, asynchronous=True, preserve_exception_message=True) return results
def work_all(filenames, args, nproc): from libtbx import easy_mp cl = args args = [] for f in filenames: args.append((f, cl)) results = easy_mp.parallel_map( func=work, iterable=args, processes=nproc, method="multiprocessing", preserve_order=True, asynchronous=True, preserve_exception_message=True) return results
def run_import(self): ''' Import images or image objects ''' if self.init.params.cctbx.selection.select_only.flag_on: msg = "Reading {} image objects".format(len(self.init.gs_img_objects)) title = 'READING IMAGE OBJECTS' self.img_list = [[i, len(self.init.gs_img_objects) + 1, j] for i, j in enumerate(self.init.gs_img_objects, 1)] else: msg = "Importing {} images".format(len(self.init.input_list)) title = 'IMPORTING IMAGES' self.img_list = [[i, len(self.init.input_list) + 1, j] for i, j in enumerate(self.init.input_list, 1)] cmd.Command.start(msg) self.prog_count = 0 self.gs_prog = cmd.ProgressBar(title=title) self.img_objects = parallel_map(iterable=self.img_list, func=self.proc_wrapper, callback=self.callback, processes=self.init.params.n_processors)
def batch_parallel_map(func=None, iterable=None, processes=None, callback=None, method=None, chunksize=1): ''' A function to run jobs in batches in each process ''' from libtbx import easy_mp # Call the batches in parallel return easy_mp.parallel_map( func=BatchFunc(func), iterable=BatchIterable(iterable, chunksize), processes=processes, callback=BatchCallback(callback), method=method, preserve_order=True, preserve_exception_message=True)
def step_over_images(self): """Loop over images, doing the search on each and extending the predictions list""" from libtbx import easy_mp #from dials.util import mp n_images = self._scan.get_num_images() # Change the number of processors if necessary nproc = 1 if nproc > n_images: nproc = n_images iterable = self._make_blocks(n_images, nproc) ref_list_of_list = easy_mp.parallel_map( func=self._search_on_image_range, iterable=iterable, processes=nproc, method="multiprocessing", preserve_order=True) self._reflections = [e for l in ref_list_of_list for e in l] return
def xia2_main(stop_after=None): '''Actually process something...''' Citations.cite('xia2') # print versions of related software from dials.util.version import dials_version Chatter.write(dials_version()) start_time = time.time() CommandLine = get_command_line() start_dir = Flags.get_starting_directory() # check that something useful has been assigned for processing... xtals = CommandLine.get_xinfo().get_crystals() no_images = True for name in xtals.keys(): xtal = xtals[name] if not xtal.get_all_image_names(): Chatter.write('-----------------------------------' + \ '-' * len(name)) Chatter.write('| No images assigned for crystal %s |' % name) Chatter.write('-----------------------------------' + '-' \ * len(name)) else: no_images = False args = [] from xia2.Handlers.Phil import PhilIndex params = PhilIndex.get_python_object() mp_params = params.xia2.settings.multiprocessing njob = mp_params.njob from libtbx import group_args xinfo = CommandLine.get_xinfo() if os.path.exists('xia2.json'): from xia2.Schema.XProject import XProject xinfo_new = xinfo xinfo = XProject.from_json(filename='xia2.json') crystals = xinfo.get_crystals() crystals_new = xinfo_new.get_crystals() for crystal_id in crystals_new.keys(): if crystal_id not in crystals: crystals[crystal_id] = crystals_new[crystal_id] continue crystals[crystal_id]._scaler = None # reset scaler for wavelength_id in crystals_new[crystal_id].get_wavelength_names(): wavelength_new = crystals_new[crystal_id].get_xwavelength(wavelength_id) if wavelength_id not in crystals[crystal_id].get_wavelength_names(): crystals[crystal_id].add_wavelength( crystals_new[crystal_id].get_xwavelength(wavelength_new)) continue wavelength = crystals[crystal_id].get_xwavelength(wavelength_id) sweeps_new = wavelength_new.get_sweeps() sweeps = wavelength.get_sweeps() sweep_names = [s.get_name() for s in sweeps] sweep_keys = [ (s.get_directory(), s.get_template(), s.get_image_range()) for s in sweeps] for sweep in sweeps_new: if ((sweep.get_directory(), sweep.get_template(), sweep.get_image_range()) not in sweep_keys): if sweep.get_name() in sweep_names: i = 1 while 'SWEEEP%i' %i in sweep_names: i += 1 sweep._name = 'SWEEP%i' %i break wavelength.add_sweep( name=sweep.get_name(), directory=sweep.get_directory(), image=sweep.get_image(), beam=sweep.get_beam_centre(), reversephi=sweep.get_reversephi(), distance=sweep.get_distance(), gain=sweep.get_gain(), dmin=sweep.get_resolution_high(), dmax=sweep.get_resolution_low(), polarization=sweep.get_polarization(), frames_to_process=sweep.get_frames_to_process(), user_lattice=sweep.get_user_lattice(), user_cell=sweep.get_user_cell(), epoch=sweep._epoch, ice=sweep._ice, excluded_regions=sweep._excluded_regions, ) sweep_names.append(sweep.get_name()) crystals = xinfo.get_crystals() failover = params.xia2.settings.failover if njob > 1: driver_type = mp_params.type command_line_args = CommandLine.get_argv()[1:] for crystal_id in crystals.keys(): for wavelength_id in crystals[crystal_id].get_wavelength_names(): wavelength = crystals[crystal_id].get_xwavelength(wavelength_id) sweeps = wavelength.get_sweeps() for sweep in sweeps: sweep._get_indexer() sweep._get_refiner() sweep._get_integrater() args.append(( group_args( driver_type=driver_type, stop_after=stop_after, failover=failover, command_line_args=command_line_args, nproc=mp_params.nproc, crystal_id=crystal_id, wavelength_id=wavelength_id, sweep_id=sweep.get_name(), ),)) from xia2.Driver.DriverFactory import DriverFactory default_driver_type = DriverFactory.get_driver_type() # run every nth job on the current computer (no need to submit to qsub) for i_job, arg in enumerate(args): if (i_job % njob) == 0: arg[0].driver_type = default_driver_type if mp_params.type == "qsub": method = "sge" else: method = "multiprocessing" nproc = mp_params.nproc qsub_command = mp_params.qsub_command if not qsub_command: qsub_command = 'qsub' qsub_command = '%s -V -cwd -pe smp %d' %(qsub_command, nproc) from libtbx import easy_mp results = easy_mp.parallel_map( process_one_sweep, args, processes=njob, #method=method, method="multiprocessing", qsub_command=qsub_command, preserve_order=True, preserve_exception_message=True) # Hack to update sweep with the serialized indexers/refiners/integraters i_sweep = 0 for crystal_id in crystals.keys(): for wavelength_id in crystals[crystal_id].get_wavelength_names(): wavelength = crystals[crystal_id].get_xwavelength(wavelength_id) remove_sweeps = [] sweeps = wavelength.get_sweeps() for sweep in sweeps: success, output, xsweep_dict = results[i_sweep] assert xsweep_dict is not None if output is not None: Chatter.write(output) if not success: Chatter.write('Sweep failed: removing %s' %sweep.get_name()) remove_sweeps.append(sweep) else: Chatter.write('Loading sweep: %s' % sweep.get_name()) from xia2.Schema.XSweep import XSweep new_sweep = XSweep.from_dict(xsweep_dict) sweep._indexer = new_sweep._indexer sweep._refiner = new_sweep._refiner sweep._integrater = new_sweep._integrater i_sweep += 1 for sweep in remove_sweeps: wavelength.remove_sweep(sweep) sample = sweep.get_xsample() sample.remove_sweep(sweep) else: for crystal_id in crystals.keys(): for wavelength_id in crystals[crystal_id].get_wavelength_names(): wavelength = crystals[crystal_id].get_xwavelength(wavelength_id) remove_sweeps = [] sweeps = wavelength.get_sweeps() for sweep in sweeps: try: if stop_after == 'index': sweep.get_indexer_cell() else: sweep.get_integrater_intensities() sweep.serialize() except Exception, e: if failover: Chatter.write('Processing sweep %s failed: %s' % \ (sweep.get_name(), str(e))) remove_sweeps.append(sweep) else: raise for sweep in remove_sweeps: wavelength.remove_sweep(sweep) sample = sweep.get_xsample() sample.remove_sweep(sweep)
def run(args): sweep_directories = [] templates = [] n_strong_spots = flex.int() n_strong_spots_dmin_4 = flex.int() d_strong_spots_99th_percentile = flex.double() d_strong_spots_95th_percentile = flex.double() d_strong_spots_50th_percentile = flex.double() n_unindexed_spots = flex.int() n_indexed_lattices = flex.int() n_integrated_lattices = flex.int() sweep_dir_cryst = flex.std_string() orig_dir = os.path.abspath(os.curdir) rmsds = flex.vec3_double() cell_params = flex.sym_mat3_double() n_indexed = flex.double() d_min_indexed = flex.double() rmsds = flex.vec3_double() nproc = easy_mp.get_processes(libtbx.Auto) #nproc = 1 results = easy_mp.parallel_map( func=run_once, iterable=args, processes=nproc, method="multiprocessing", preserve_order=True, asynchronous=True, preserve_exception_message=True, ) for result in results: if result is None: continue sweep_directories.append(result.sweep_dir) templates.append(result.template) n_strong_spots.append(result.n_strong_spots) n_strong_spots_dmin_4.append(result.n_strong_spots_dmin_4) n_unindexed_spots.append(result.n_unindexed_spots) n_indexed_lattices.append(result.n_indexed_lattices) n_integrated_lattices.append(result.n_integrated_lattices) d_strong_spots_50th_percentile.append(result.d_strong_spots_50th_percentile) d_strong_spots_95th_percentile.append(result.d_strong_spots_95th_percentile) d_strong_spots_99th_percentile.append(result.d_strong_spots_99th_percentile) cell_params.extend(result.cell_params) n_indexed.extend(result.n_indexed) d_min_indexed.extend(result.d_min_indexed) rmsds.extend(result.rmsds) sweep_dir_cryst.extend(result.sweep_dir_cryst) table_data = [('sweep_dir', 'template', '#strong_spots', '#unindexed_spots', '#lattices', 'd_spacing_50th_percentile', 'd_spacing_95th_percentile', 'd_spacing_99th_percentile',)] for i in range(len(sweep_directories)): table_data.append((sweep_directories[i], templates[i], str(n_strong_spots[i]), str(n_unindexed_spots[i]), str(n_indexed_lattices[i]), str(d_strong_spots_50th_percentile[i]), str(d_strong_spots_95th_percentile[i]), str(d_strong_spots_99th_percentile[i]), )) with open('results.txt', 'wb') as f: print >> f, table_utils.format( table_data, has_header=True, justify='right') table_data = [('sweep_dir', 'cell_a', 'cell_b', 'cell_c', 'alpha', 'beta', 'gamma', '#indexed_reflections', 'd_min_indexed', 'rmsd_x', 'rmsd_y', 'rmsd_phi')] for i in range(len(cell_params)): table_data.append((sweep_dir_cryst[i], str(cell_params[i][0]), str(cell_params[i][1]), str(cell_params[i][2]), str(cell_params[i][3]), str(cell_params[i][4]), str(cell_params[i][5]), str(n_indexed[i]), str(d_min_indexed[i]), str(rmsds[i][0]), str(rmsds[i][1]), str(rmsds[i][2]), )) with open('results_indexed.txt', 'wb') as f: print >> f, table_utils.format( table_data, has_header=True, justify='right') cell_a = flex.double([params[0] for params in cell_params]) cell_b = flex.double([params[1] for params in cell_params]) cell_c = flex.double([params[2] for params in cell_params]) cell_alpha = flex.double([params[3] for params in cell_params]) cell_beta = flex.double([params[4] for params in cell_params]) cell_gamma = flex.double([params[5] for params in cell_params]) from matplotlib import pyplot from matplotlib.backends.backend_pdf import PdfPages pyplot.rc('font', family='serif') pyplot.rc('font', serif='Times New Roman') red, blue = '#B2182B', '#2166AC' hist = flex.histogram(n_strong_spots_dmin_4.as_double(), n_slots=20) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(), color=blue, edgecolor=blue) ax.set_xlabel('Spot count') ax.set_ylabel('Frequency') pdf = PdfPages("spot_count_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() hist = flex.histogram(n_indexed_lattices.as_double(), n_slots=flex.max(n_indexed_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(range(int(hist.data_max())), hist.slots(), width=0.75*hist.slot_width(), align='center', color=blue, edgecolor=blue) ax.set_xlim(-0.5, hist.data_max()-0.5) ax.set_xticks(range(0,int(hist.data_max()))) ax.set_xlabel('Number of indexed lattices') ax.set_ylabel('Frequency') pdf = PdfPages("n_indexed_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() if flex.max(n_integrated_lattices) > 0: hist = flex.histogram(n_integrated_lattices.as_double(), n_slots=flex.max(n_integrated_lattices)) hist.show() fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(range(int(hist.data_max())), hist.slots(), width=0.75*hist.slot_width(), align='center', color=blue, edgecolor=blue) ax.set_xlim(-0.5, hist.data_max()-0.5) ax.set_xticks(range(0,int(hist.data_max()))) ax.set_xlabel('Number of integrated lattices') ax.set_ylabel('Frequency') pdf = PdfPages("n_integrated_lattices_histogram.pdf") pdf.savefig(fig) pdf.close() #pyplot.show() fig, axes = pyplot.subplots(nrows=2, ncols=3, squeeze=False) for i, cell_param in enumerate( (cell_a, cell_b, cell_c, cell_alpha, cell_beta, cell_gamma)): ax = axes.flat[i] flex.min_max_mean_double(cell_param).show() print flex.median(cell_param) hist = flex.histogram(cell_param, n_slots=20) hist.show() ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(), color=blue, edgecolor=blue) ax.set_xlabel('Cell parameter') ax.set_ylabel('Frequency') pyplot.tight_layout() pdf = PdfPages("cell_parameters.pdf") pdf.savefig(fig) pdf.close()
def build_up(self, objective_only=False): # code here to calculate the residuals. Rely on the target class # for this # I need to use the weights. They are the variances of the # observations... See http://en.wikipedia.org/wiki/Non-linear_least_squares # at 'diagonal weight matrix' # set current parameter values self.prepare_for_step() # Reset the state to construction time, i.e. no equations accumulated self.reset() # observation terms if objective_only: residuals, weights = self._target.compute_residuals() self.add_residuals(residuals, weights) else: blocks = self._target.split_matches_into_blocks(nproc=self._nproc) if self._nproc > 1: # ensure the jacobian is not tracked self._jacobian = None # processing functions def task_wrapper(block): residuals, jacobian, weights = self._target.compute_residuals_and_gradients(block) return dict(residuals=residuals, jacobian=jacobian, weights=weights) def callback_wrapper(result): self.add_equations(result["residuals"], result["jacobian"], result["weights"]) # no longer need the result result["residuals"] = None result["jacobian"] = None result["weights"] = None return task_results = easy_mp.parallel_map( func=task_wrapper, iterable=blocks, processes=self._nproc, callback=callback_wrapper, method="multiprocessing", # preserve_exception_message=True ) else: for block in blocks: residuals, self._jacobian, weights = self._target.compute_residuals_and_gradients(block) self.add_equations(residuals, self._jacobian, weights) # restraints terms restraints = self._target.compute_restraints_residuals_and_gradients() if restraints: if objective_only: self.add_residuals(restraints[0], restraints[2]) else: self.add_equations(restraints[0], restraints[1], restraints[2]) return
def build_hist(nproc=1): from scitbx.array_family import flex from libtbx import easy_mp from collections import Counter # FIXME use proper optionparser here. This works for now if len(sys.argv) >= 2 and sys.argv[1].startswith('nproc='): nproc=int(sys.argv[1][6:]) sys.argv = sys.argv[1:] if len(sys.argv) == 2 and sys.argv[1].endswith('.json'): from dxtbx import datablock db = datablock.DataBlockFactory.from_json_file(sys.argv[1])[0] image_list = db.extract_imagesets()[0].paths() else: image_list = sys.argv[1:] image_count = len(image_list) # Faster, yet still less than ideal and wasting a lot of resources. limit = get_overload(image_list[0]) binfactor = 5 # register up to 500% counts histmax = (limit * binfactor) + 0.0 histbins = int(limit * binfactor) + 1 use_python_counter = histbins > 90000000 # empirically determined print "Processing %d images in %d processes using %s\n" % (image_count, nproc, \ "python Counter" if use_python_counter else "flex arrays") def process_image(process): import sys last_update = start = timeit.default_timer() i = process if use_python_counter: local_hist = Counter() else: local_hist = flex.histogram(flex.double(), data_min=0.0, data_max=histmax, n_slots=histbins) max_images = image_count // nproc if process >= image_count % nproc: max_images += 1 while i < image_count: data = read_cbf_image(image_list[i]) if not use_python_counter: data = flex.histogram(data.as_double().as_1d(), data_min=0.0, data_max=histmax, n_slots=histbins) local_hist.update(data) i = i + nproc if process == 0: if timeit.default_timer() > (last_update + 3): last_update = timeit.default_timer() if sys.stdout.isatty(): sys.stdout.write('\033[A') print 'Processed %d%% (%d seconds remain) ' % (100 * i // image_count, round((image_count - i) * (last_update - start) / (i+1))) return local_hist results = easy_mp.parallel_map( func=process_image, iterable=range(nproc), processes=nproc, preserve_exception_message=True) print "Merging results" result_hist = None for hist in results: if result_hist is None: result_hist = hist else: result_hist.update(hist) if not use_python_counter: # reformat histogram into dictionary result = list(result_hist.slots()) result_hist = { b: count for b, count in enumerate(result) if count > 0 } results = { 'scale_factor': 1 / limit, 'overload_limit': limit, 'counts': result_hist } print "Writing results to overload.json" with open('overload.json', 'w') as fh: json.dump(results, fh, indent=1, sort_keys=True)
def _scale_prepare(self): '''Prepare the data for scaling - this will reindex it the reflections to the correct pointgroup and setting, for instance, and move the reflection files to the scale directory.''' Citations.cite('xds') Citations.cite('ccp4') Citations.cite('pointless') # GATHER phase - get the reflection files together... note that # it is not necessary in here to keep the batch information as we # don't wish to rebatch the reflections prior to scaling. # FIXME need to think about what I will do about the radiation # damage analysis in here... self._sweep_information = { } # FIXME in here I want to record the batch number to # epoch mapping as per the CCP4 Scaler implementation. Journal.block( 'gathering', self.get_scaler_xcrystal().get_name(), 'XDS', {'working directory':self.get_working_directory()}) for epoch in self._scalr_integraters.keys(): intgr = self._scalr_integraters[epoch] pname, xname, dname = intgr.get_integrater_project_info() sname = intgr.get_integrater_sweep_name() self._sweep_information[epoch] = { 'pname':pname, 'xname':xname, 'dname':dname, 'integrater':intgr, 'corrected_intensities':intgr.get_integrater_corrected_intensities(), 'prepared_reflections':None, 'scaled_reflections':None, 'header':intgr.get_header(), 'batches':intgr.get_integrater_batches(), 'image_to_epoch':intgr.get_integrater_sweep( ).get_image_to_epoch(), 'image_to_dose':{}, 'batch_offset':0, 'sname':sname } Journal.entry({'adding data from':'%s/%s/%s' % \ (xname, dname, sname)}) # what are these used for? # pname / xname / dname - dataset identifiers # image to epoch / batch offset / batches - for RD analysis Debug.write('For EPOCH %s have:' % str(epoch)) Debug.write('ID = %s/%s/%s' % (pname, xname, dname)) Debug.write('SWEEP = %s' % intgr.get_integrater_sweep_name()) # next work through all of the reflection files and make sure that # they are XDS_ASCII format... epochs = self._sweep_information.keys() epochs.sort() self._first_epoch = min(epochs) self._scalr_pname = self._sweep_information[epochs[0]]['pname'] self._scalr_xname = self._sweep_information[epochs[0]]['xname'] for epoch in epochs: intgr = self._scalr_integraters[epoch] pname = self._sweep_information[epoch]['pname'] xname = self._sweep_information[epoch]['xname'] dname = self._sweep_information[epoch]['dname'] sname = self._sweep_information[epoch]['sname'] if self._scalr_pname != pname: raise RuntimeError, 'all data must have a common project name' xname = self._sweep_information[epoch]['xname'] if self._scalr_xname != xname: raise RuntimeError, \ 'all data for scaling must come from one crystal' xsh = XDSScalerHelper() xsh.set_working_directory(self.get_working_directory()) hklin = self._sweep_information[epoch]['corrected_intensities'] hklout = os.path.join(self.get_working_directory(), '%s_%s_%s_%s_CORRECTED.HKL' %( pname, xname, dname, sname)) sweep = intgr.get_integrater_sweep() if sweep.get_frames_to_process() is not None: offset = intgr.get_frame_offset() #print "offset: %d" %offset start, end = sweep.get_frames_to_process() start -= offset end -= offset #end += 1 ???? #print "limiting batches: %d-%d" %(start, end) xsh.limit_batches(hklin, hklout, start, end) self._sweep_information[epoch]['corrected_intensities'] = hklout # if there is more than one sweep then compare the lattices # and eliminate all but the lowest symmetry examples if # there are more than one... # ------------------------------------------------- # Ensure that the integration lattices are the same # ------------------------------------------------- need_to_return = False if len(self._sweep_information.keys()) > 1: lattices = [] # FIXME run this stuff in parallel as well... for epoch in self._sweep_information.keys(): intgr = self._sweep_information[epoch]['integrater'] hklin = self._sweep_information[epoch]['corrected_intensities'] refiner = intgr.get_integrater_refiner() if self._scalr_input_pointgroup: pointgroup = self._scalr_input_pointgroup reindex_op = 'h,k,l' ntr = False else: pointgroup, reindex_op, ntr = \ self._pointless_indexer_jiffy(hklin, refiner) Debug.write('X1698: %s: %s' % (pointgroup, reindex_op)) lattice = Syminfo.get_lattice(pointgroup) if not lattice in lattices: lattices.append(lattice) if ntr: # if we need to return, we should logically reset # any reindexing operator right? right here all # we are talking about is the correctness of # individual pointgroups?? Bug # 3373 reindex_op = 'h,k,l' # actually, should this not be done "by magic" # when a new pointgroup is assigned in the # pointless indexer jiffy above?! intgr.set_integrater_reindex_operator( reindex_op, compose = False) need_to_return = True # bug # 2433 - need to ensure that all of the lattice # conclusions were the same... if len(lattices) > 1: ordered_lattices = [] for l in lattices_in_order(): if l in lattices: ordered_lattices.append(l) correct_lattice = ordered_lattices[0] Debug.write('Correct lattice asserted to be %s' % \ correct_lattice) # transfer this information back to the indexers for epoch in self._sweep_information.keys(): integrater = self._sweep_information[ epoch]['integrater'] refiner = integrater.get_integrater_refiner() sname = integrater.get_integrater_sweep_name() if not refiner: continue state = refiner.set_refiner_asserted_lattice( correct_lattice) if state == refiner.LATTICE_CORRECT: Debug.write('Lattice %s ok for sweep %s' % \ (correct_lattice, sname)) elif state == refiner.LATTICE_IMPOSSIBLE: raise RuntimeError, 'Lattice %s impossible for %s' % \ (correct_lattice, sname) elif state == refiner.LATTICE_POSSIBLE: Debug.write('Lattice %s assigned for sweep %s' % \ (correct_lattice, sname)) need_to_return = True # if one or more of them was not in the lowest lattice, # need to return here to allow reprocessing if need_to_return: self.set_scaler_done(False) self.set_scaler_prepare_done(False) return # next if there is more than one sweep then generate # a merged reference reflection file to check that the # setting for all reflection files is the same... # if we get to here then all data was processed with the same # lattice # ---------------------------------------------------------- # next ensure that all sweeps are set in the correct setting # ---------------------------------------------------------- if self.get_scaler_reference_reflection_file(): self._reference = self.get_scaler_reference_reflection_file() Debug.write('Using HKLREF %s' % self._reference) md = self._factory.Mtzdump() md.set_hklin(self.get_scaler_reference_reflection_file()) md.dump() self._xds_spacegroup = Syminfo.spacegroup_name_to_number( md.get_spacegroup()) Debug.write('Spacegroup %d' % self._xds_spacegroup) elif PhilIndex.params.xia2.settings.scale.reference_reflection_file: self._reference = PhilIndex.params.xia2.settings.scale.reference_reflection_file Debug.write('Using HKLREF %s' % self._reference) md = self._factory.Mtzdump() md.set_hklin(PhilIndex.params.xia2.settings.scale.reference_reflection_file) md.dump() self._xds_spacegroup = Syminfo.spacegroup_name_to_number( md.get_spacegroup()) Debug.write('Spacegroup %d' % self._xds_spacegroup) params = PhilIndex.params use_brehm_diederichs = params.xia2.settings.use_brehm_diederichs if len(self._sweep_information.keys()) > 1 and use_brehm_diederichs: brehm_diederichs_files_in = [] for epoch in self._sweep_information.keys(): intgr = self._sweep_information[epoch]['integrater'] hklin = self._sweep_information[epoch]['corrected_intensities'] refiner = intgr.get_integrater_refiner() # in here need to consider what to do if the user has # assigned the pointgroup on the command line ... if not self._scalr_input_pointgroup: pointgroup, reindex_op, ntr = \ self._pointless_indexer_jiffy(hklin, refiner) if ntr: # Bug # 3373 Debug.write('Reindex to standard (PIJ): %s' % \ reindex_op) intgr.set_integrater_reindex_operator( reindex_op, compose = False) reindex_op = 'h,k,l' need_to_return = True else: # 27/FEB/08 to support user assignment of pointgroups Debug.write('Using input pointgroup: %s' % \ self._scalr_input_pointgroup) pointgroup = self._scalr_input_pointgroup reindex_op = 'h,k,l' intgr.set_integrater_reindex_operator(reindex_op) intgr.set_integrater_spacegroup_number( Syminfo.spacegroup_name_to_number(pointgroup)) self._sweep_information[epoch]['corrected_intensities'] \ = intgr.get_integrater_corrected_intensities() # convert the XDS_ASCII for this sweep to mtz - on the next # get this should be in the correct setting... dname = self._sweep_information[epoch]['dname'] sname = intgr.get_integrater_sweep_name() hklin = self._sweep_information[epoch]['corrected_intensities'] hklout = os.path.join(self.get_working_directory(), '%s_%s.mtz' % (dname, sname)) FileHandler.record_temporary_file(hklout) # now use pointless to make this conversion pointless = self._factory.Pointless() pointless.set_xdsin(hklin) pointless.set_hklout(hklout) pointless.xds_to_mtz() brehm_diederichs_files_in.append(hklout) # now run cctbx.brehm_diederichs to figure out the indexing hand for # each sweep from xia2.Wrappers.Cctbx.BrehmDiederichs import BrehmDiederichs brehm_diederichs = BrehmDiederichs() brehm_diederichs.set_working_directory(self.get_working_directory()) auto_logfiler(brehm_diederichs) brehm_diederichs.set_input_filenames(brehm_diederichs_files_in) # 1 or 3? 1 seems to work better? brehm_diederichs.set_asymmetric(1) brehm_diederichs.run() reindexing_dict = brehm_diederichs.get_reindexing_dict() for epoch in self._sweep_information.keys(): intgr = self._sweep_information[epoch]['integrater'] dname = self._sweep_information[epoch]['dname'] sname = intgr.get_integrater_sweep_name() hklin = self._sweep_information[epoch]['corrected_intensities'] hklout = os.path.join(self.get_working_directory(), '%s_%s.mtz' % (dname, sname)) # apply the reindexing operator intgr.set_integrater_reindex_operator(reindex_op) # and copy the reflection file to the local directory hklin = self._sweep_information[epoch]['corrected_intensities'] hklout = os.path.join(self.get_working_directory(), '%s_%s.HKL' % (dname, sname)) Debug.write('Copying %s to %s' % (hklin, hklout)) shutil.copyfile(hklin, hklout) # record just the local file name... self._sweep_information[epoch][ 'prepared_reflections'] = os.path.split(hklout)[-1] elif len(self._sweep_information.keys()) > 1 and \ not self._reference: # need to generate a reference reflection file - generate this # from the reflections in self._first_epoch # # FIXME this should really use the Brehm and Diederichs method # if you have lots of little sweeps... intgr = self._sweep_information[self._first_epoch]['integrater'] hklin = self._sweep_information[epoch]['corrected_intensities'] refiner = intgr.get_integrater_refiner() if self._scalr_input_pointgroup: Debug.write('Using input pointgroup: %s' % \ self._scalr_input_pointgroup) pointgroup = self._scalr_input_pointgroup ntr = False reindex_op = 'h,k,l' else: pointgroup, reindex_op, ntr = self._pointless_indexer_jiffy( hklin, refiner) Debug.write('X1698: %s: %s' % (pointgroup, reindex_op)) reference_reindex_op = intgr.get_integrater_reindex_operator() if ntr: # Bug # 3373 intgr.set_integrater_reindex_operator( reindex_op, compose = False) reindex_op = 'h,k,l' need_to_return = True self._xds_spacegroup = Syminfo.spacegroup_name_to_number(pointgroup) # next pass this reindexing operator back to the source # of the reflections intgr.set_integrater_reindex_operator(reindex_op) intgr.set_integrater_spacegroup_number( Syminfo.spacegroup_name_to_number(pointgroup)) self._sweep_information[epoch]['corrected_intensities'] \ = intgr.get_integrater_corrected_intensities() hklin = self._sweep_information[epoch]['corrected_intensities'] hklout = os.path.join(self.get_working_directory(), 'xds-pointgroup-reference-unsorted.mtz') FileHandler.record_temporary_file(hklout) # now use pointless to handle this conversion pointless = self._factory.Pointless() pointless.set_xdsin(hklin) pointless.set_hklout(hklout) pointless.xds_to_mtz() self._reference = hklout if self._reference: from xia2.Driver.DriverFactory import DriverFactory def run_one_sweep(args): sweep_information = args[0] pointless_indexer_jiffy = args[1] factory = args[2] job_type = args[3] if job_type: DriverFactory.set_driver_type(job_type) intgr = sweep_information['integrater'] hklin = sweep_information['corrected_intensities'] refiner = intgr.get_integrater_refiner() # in here need to consider what to do if the user has # assigned the pointgroup on the command line ... if not self._scalr_input_pointgroup: pointgroup, reindex_op, ntr = \ self._pointless_indexer_jiffy(hklin, refiner) if ntr: # Bug # 3373 Debug.write('Reindex to standard (PIJ): %s' % \ reindex_op) intgr.set_integrater_reindex_operator( reindex_op, compose = False) reindex_op = 'h,k,l' need_to_return = True else: # 27/FEB/08 to support user assignment of pointgroups Debug.write('Using input pointgroup: %s' % \ self._scalr_input_pointgroup) pointgroup = self._scalr_input_pointgroup reindex_op = 'h,k,l' intgr.set_integrater_reindex_operator(reindex_op) intgr.set_integrater_spacegroup_number( Syminfo.spacegroup_name_to_number(pointgroup)) sweep_information['corrected_intensities'] \ = intgr.get_integrater_corrected_intensities() # convert the XDS_ASCII for this sweep to mtz - on the next # get this should be in the correct setting... hklin = sweep_information['corrected_intensities'] # now use pointless to make this conversion # try with no conversion?! pointless = self._factory.Pointless() pointless.set_xdsin(hklin) hklout = os.path.join( self.get_working_directory(), '%d_xds-pointgroup-unsorted.mtz' %pointless.get_xpid()) FileHandler.record_temporary_file(hklout) pointless.set_hklout(hklout) pointless.xds_to_mtz() pointless = self._factory.Pointless() pointless.set_hklin(hklout) pointless.set_hklref(self._reference) pointless.decide_pointgroup() pointgroup = pointless.get_pointgroup() reindex_op = pointless.get_reindex_operator() # for debugging print out the reindexing operations and # what have you... Debug.write('Reindex to standard: %s' % reindex_op) # this should send back enough information that this # is in the correct pointgroup (from the call above) and # also in the correct setting, from the interaction # with the reference set... - though I guess that the # spacegroup number should not have changed, right? # set the reindex operation afterwards... though if the # spacegroup number is the same this should make no # difference, right?! intgr.set_integrater_spacegroup_number( Syminfo.spacegroup_name_to_number(pointgroup)) intgr.set_integrater_reindex_operator(reindex_op) sweep_information['corrected_intensities'] \ = intgr.get_integrater_corrected_intensities() # and copy the reflection file to the local directory dname = sweep_information['dname'] sname = intgr.get_integrater_sweep_name() hklin = sweep_information['corrected_intensities'] hklout = os.path.join(self.get_working_directory(), '%s_%s.HKL' % (dname, sname)) Debug.write('Copying %s to %s' % (hklin, hklout)) shutil.copyfile(hklin, hklout) # record just the local file name... sweep_information['prepared_reflections'] = os.path.split(hklout)[-1] return sweep_information from libtbx import easy_mp params = PhilIndex.get_python_object() mp_params = params.xia2.settings.multiprocessing njob = mp_params.njob if njob > 1: # cache drivertype drivertype = DriverFactory.get_driver_type() args = [ (self._sweep_information[epoch], self._pointless_indexer_jiffy, self._factory, mp_params.type) for epoch in self._sweep_information.keys()] results_list = easy_mp.parallel_map( run_one_sweep, args, params=None, processes=njob, method="threading", asynchronous=True, callback=None, preserve_order=True, preserve_exception_message=True) # restore drivertype DriverFactory.set_driver_type(drivertype) # results should be given back in the same order for i, epoch in enumerate(self._sweep_information.keys()): self._sweep_information[epoch] = results_list[i] else: for epoch in self._sweep_information.keys(): self._sweep_information[epoch] = run_one_sweep( (self._sweep_information[epoch], self._pointless_indexer_jiffy, self._factory, None)) else: # convert the XDS_ASCII for this sweep to mtz epoch = self._first_epoch intgr = self._sweep_information[epoch]['integrater'] refiner = intgr.get_integrater_refiner() sname = intgr.get_integrater_sweep_name() hklout = os.path.join(self.get_working_directory(), '%s-pointless.mtz' % sname) FileHandler.record_temporary_file(hklout) pointless = self._factory.Pointless() pointless.set_xdsin(self._sweep_information[epoch]['corrected_intensities']) pointless.set_hklout(hklout) pointless.xds_to_mtz() # run it through pointless interacting with the # Indexer which belongs to this sweep hklin = hklout if self._scalr_input_pointgroup: Debug.write('Using input pointgroup: %s' % \ self._scalr_input_pointgroup) pointgroup = self._scalr_input_pointgroup ntr = False reindex_op = 'h,k,l' else: pointgroup, reindex_op, ntr = self._pointless_indexer_jiffy( hklin, refiner) if ntr: # if we need to return, we should logically reset # any reindexing operator right? right here all # we are talking about is the correctness of # individual pointgroups?? Bug # 3373 reindex_op = 'h,k,l' intgr.set_integrater_reindex_operator( reindex_op, compose = False) need_to_return = True self._xds_spacegroup = Syminfo.spacegroup_name_to_number(pointgroup) # next pass this reindexing operator back to the source # of the reflections intgr.set_integrater_reindex_operator(reindex_op) intgr.set_integrater_spacegroup_number( Syminfo.spacegroup_name_to_number(pointgroup)) self._sweep_information[epoch]['corrected_intensities'] \ = intgr.get_integrater_corrected_intensities() hklin = self._sweep_information[epoch]['corrected_intensities'] dname = self._sweep_information[epoch]['dname'] hklout = os.path.join(self.get_working_directory(), '%s_%s.HKL' % (dname, sname)) # and copy the reflection file to the local # directory Debug.write('Copying %s to %s' % (hklin, hklout)) shutil.copyfile(hklin, hklout) # record just the local file name... self._sweep_information[epoch][ 'prepared_reflections'] = os.path.split(hklout)[-1] if need_to_return: self.set_scaler_done(False) self.set_scaler_prepare_done(False) return unit_cell_list = [] for epoch in self._sweep_information.keys(): integrater = self._sweep_information[epoch]['integrater'] cell = integrater.get_integrater_cell() n_ref = integrater.get_integrater_n_ref() Debug.write('Cell for %s: %.2f %.2f %.2f %.2f %.2f %.2f' % \ (integrater.get_integrater_sweep_name(), cell[0], cell[1], cell[2], cell[3], cell[4], cell[5])) Debug.write('=> %d reflections' % n_ref) unit_cell_list.append((cell, n_ref)) self._scalr_cell = compute_average_unit_cell(unit_cell_list) self._scalr_resolution_limits = { } Debug.write('Determined unit cell: %.2f %.2f %.2f %.2f %.2f %.2f' % \ tuple(self._scalr_cell)) if os.path.exists(os.path.join( self.get_working_directory(), 'REMOVE.HKL')): os.remove(os.path.join( self.get_working_directory(), 'REMOVE.HKL')) Debug.write('Deleting REMOVE.HKL at end of scale prepare.') return
def run_fast(args): islow = args[0] result_val = flex.int(388) for jfast in xrange(388): histo1 = histograms[islow*388+jfast,:] nphotons = fit_3_gaussian.test_fit(histo1,plot=False) print islow*388+jfast, "of %d, # photons= %d"%(len(histograms),nphotons) result_val[jfast]=nphotons return {islow:result_val} iterable = [(i,) for i in xrange(185)] results = easy_mp.parallel_map( func=run_fast, iterable=iterable, processes=nproc, method="multiprocessing", preserve_order=True ) for result in results: for key in result.keys(): values = result[key] for i in xrange(len(values)): inelastic[key,i] = values[i] # save fitting result: # some heuristics to output a proper filename if fname.find('_offline')>=0: filename = fname.replace('_offline','_fitted') else:
strongs.append(strong_filepath) print "Found %d images to index"%len(images) def index(item): image, strong = item base = os.path.splitext(os.path.basename(image))[0] datablock = os.path.join(params.output_dir, base + "_datablock.json") command = "dials.import %s output.datablock=%s"%(image, datablock) if params.reference_geometry is not None: command += " reference_geometry=%s"%params.reference_geometry easy_run.fully_buffered(command).raise_if_errors().show_stdout() command = "dials.index %s %s output.experiments=%s output.reflections=%s"% ( datablock, strong, os.path.join(params.output_dir, base + "_experiments.json"), os.path.join(params.output_dir, base + "_indexed.pickle")) if indexing_phil is not None: command += " %s"%indexing_phil easy_run.fully_buffered(command).show_stdout() easy_mp.parallel_map( func=index, iterable=zip(images, strongs), processes=params.mp.nproc, method=params.mp.method, preserve_order=False, preserve_exception_message=True) print "All done"
def run(self): '''Execute the script.''' from dials.util import log from time import time from libtbx import easy_mp import copy # Parse the command line params, options, all_paths = self.parser.parse_args(show_diff_phil=False, return_unhandled=True) # Check we have some filenames if not all_paths: self.parser.print_help() return # Save the options self.options = options self.params = params st = time() # Configure logging log.config( params.verbosity, info='dials.process.log', debug='dials.process.debug.log') # Log the diff phil diff_phil = self.parser.diff_phil.as_str() if diff_phil is not '': logger.info('The following parameters have been modified:\n') logger.info(diff_phil) self.load_reference_geometry() from dials.command_line.dials_import import ManualGeometryUpdater update_geometry = ManualGeometryUpdater(params) # Import stuff logger.info("Loading files...") pre_import = params.dispatch.pre_import or len(all_paths) == 1 if pre_import: # Handle still imagesets by breaking them apart into multiple datablocks # Further handle single file still imagesets (like HDF5) by tagging each # frame using its index datablocks = [do_import(path) for path in all_paths] if self.reference_detector is not None: from dxtbx.model import Detector for datablock in datablocks: for imageset in datablock.extract_imagesets(): for i in range(len(imageset)): imageset.set_detector( Detector.from_dict(self.reference_detector.to_dict()), index=i) for datablock in datablocks: for imageset in datablock.extract_imagesets(): update_geometry(imageset) indices = [] basenames = [] split_datablocks = [] for datablock in datablocks: for imageset in datablock.extract_imagesets(): paths = imageset.paths() for i in xrange(len(imageset)): subset = imageset[i:i+1] split_datablocks.append(DataBlockFactory.from_imageset(subset)[0]) indices.append(i) basenames.append(os.path.splitext(os.path.basename(paths[i]))[0]) tags = [] for i, basename in zip(indices, basenames): if basenames.count(basename) > 1: tags.append("%s_%05d"%(basename, i)) else: tags.append(basename) # Wrapper function def do_work(item): Processor(copy.deepcopy(params)).process_datablock(item[0], item[1]) iterable = zip(tags, split_datablocks) else: basenames = [os.path.splitext(os.path.basename(filename))[0] for filename in all_paths] tags = [] for i, basename in enumerate(basenames): if basenames.count(basename) > 1: tags.append("%s_%05d"%(basename, i)) else: tags.append(basename) # Wrapper function def do_work(item): tag, filename = item datablock = do_import(filename) imagesets = datablock.extract_imagesets() if len(imagesets) == 0 or len(imagesets[0]) == 0: logger.info("Zero length imageset in file: %s"%filename) return if len(imagesets) > 1: raise Abort("Found more than one imageset in file: %s"%filename) if len(imagesets[0]) > 1: raise Abort("Found a multi-image file. Run again with pre_import=True") if self.reference_detector is not None: from dxtbx.model import Detector imagesets[0].set_detector(Detector.from_dict(self.reference_detector.to_dict())) update_geometry(imagesets[0]) Processor(copy.deepcopy(params)).process_datablock(tag, datablock) iterable = zip(tags, all_paths) # Process the data if params.mp.method == 'mpi': from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() # each process in MPI has a unique id, 0-indexed size = comm.Get_size() # size: number of processes running in this job for i, item in enumerate(iterable): if (i+rank)%size == 0: do_work(item) else: easy_mp.parallel_map( func=do_work, iterable=iterable, processes=params.mp.nproc, method=params.mp.method, preserve_order=True, preserve_exception_message=True) # Total Time logger.info("") logger.info("Total Time Taken = %f seconds" % (time() - st))
def refined_settings_factory_from_refined_triclinic( params, experiments, reflections, i_setting=None, lepage_max_delta=5.0, nproc=1, refiner_verbosity=0): assert len(experiments.crystals()) == 1 crystal = experiments.crystals()[0] used_reflections = copy.deepcopy(reflections) UC = crystal.get_unit_cell() from rstbx.dps_core.lepage import iotbx_converter Lfat = refined_settings_list() for item in iotbx_converter(UC, lepage_max_delta): Lfat.append(bravais_setting(item)) supergroup = Lfat.supergroup() triclinic = Lfat.triclinic() triclinic_miller = used_reflections['miller_index'] # assert no transformation between indexing and bravais list assert str(triclinic['cb_op_inp_best'])=="a,b,c" Nset = len(Lfat) for j in xrange(Nset): Lfat[j].setting_number = Nset-j from cctbx.crystal_orientation import crystal_orientation from cctbx import sgtbx from scitbx import matrix for j in xrange(Nset): cb_op = Lfat[j]['cb_op_inp_best'].c().as_double_array()[0:9] orient = crystal_orientation(crystal.get_A(),True) orient_best = orient.change_basis(matrix.sqr(cb_op).transpose()) constrain_orient = orient_best.constrain(Lfat[j]['system']) bravais = Lfat[j]["bravais"] cb_op_best_ref = Lfat[j]['best_subsym'].change_of_basis_op_to_reference_setting() space_group = sgtbx.space_group_info( number=bravais_lattice_to_lowest_symmetry_spacegroup_number[bravais]).group() space_group = space_group.change_basis(cb_op_best_ref.inverse()) bravais = str(bravais_types.bravais_lattice(group=space_group)) Lfat[j]["bravais"] = bravais Lfat[j].unrefined_crystal = dials_crystal_from_orientation( constrain_orient, space_group) args = [] for subgroup in Lfat: args.append(( params, subgroup, used_reflections, experiments, refiner_verbosity)) results = easy_mp.parallel_map( func=refine_subgroup, iterable=args, processes=nproc, method="multiprocessing", preserve_order=True, asynchronous=True, preserve_exception_message=True) for i, result in enumerate(results): Lfat[i] = result return Lfat
def run(self): '''Execute the script.''' from dials.util import log from logging import info from time import time from libtbx.utils import Abort from libtbx import easy_mp import os, copy from dxtbx.datablock import DataBlockFactory # Parse the command line params, options, all_paths = self.parser.parse_args(show_diff_phil=False, return_unhandled=True) # Check we have some filenames if len(all_paths) == 0: self.parser.print_help() return # Save the options self.options = options self.params = params st = time() # Configure logging log.config( params.verbosity, info='dials.process.log', debug='dials.process.debug.log') # Log the diff phil diff_phil = self.parser.diff_phil.as_str() if diff_phil is not '': info('The following parameters have been modified:\n') info(diff_phil) # Import stuff info("Loading files...") if len(all_paths) == 1: datablocks = DataBlockFactory.from_filenames(all_paths) else: def do_import(filename): info("Loading %s"%os.path.basename(filename)) datablocks = DataBlockFactory.from_filenames([filename]) if len(datablocks) == 0: raise Abort("Could not load %s"%filename) if len(datablocks) > 1: raise Abort("Got multiple datablocks from file %s"%filename) return datablocks[0] datablocks = easy_mp.parallel_map( func=do_import, iterable=all_paths, processes=params.mp.nproc, method=params.mp.method, preserve_order=True, preserve_exception_message=True) if len(datablocks) == 0: raise Abort('No datablocks specified') # Handle still imagesets by breaking them apart into multiple datablocks # Further handle single file still imagesets (like HDF5) by tagging each # frame using its index indices = [] basenames = [] split_datablocks = [] for datablock in datablocks: for imageset in datablock.extract_imagesets(): for i in xrange(len(imageset)): subset = imageset[i:i+1] split_datablocks.append(DataBlockFactory.from_imageset(subset)[0]) indices.append(i) basenames.append(os.path.splitext(os.path.basename(subset.paths()[0]))[0]) tags = [] for i, basename in zip(indices, basenames): if basenames.count(basename) > 1: tags.append("%s_%d"%(basename, i)) else: tags.append(basename) # Wrapper function def do_work(item): Processor(copy.deepcopy(params)).process_datablock(item[0], item[1]) # Process the data easy_mp.parallel_map( func=do_work, iterable=zip(tags, split_datablocks), processes=params.mp.nproc, method=params.mp.method, preserve_order=True, preserve_exception_message=True) # Total Time info("") info("Total Time Taken = %f seconds" % (time() - st))
def run(args): from libtbx.phil import command_line from dials.util.command_line import Importer from dials.array_family import flex print args importer = Importer(args, check_format=False) assert len(importer.datablocks) == 1 sweeps = importer.datablocks[0].extract_imagesets() assert len(sweeps) == 1 sweep = sweeps[0] cmd_line = command_line.argument_interpreter(master_params=master_phil_scope) working_phil = cmd_line.process_and_fetch(args=importer.unhandled_arguments) working_phil.show() params = working_phil.extract() assert params.unit_cell is not None assert params.space_group is not None unit_cell = params.unit_cell space_group = params.space_group.group() import random from dxtbx.model.crystal import crystal_model from cctbx import crystal, miller from scitbx import matrix flex.set_random_seed(params.random_seed) random.seed(params.random_seed) crystal_symmetry = crystal.symmetry(unit_cell=unit_cell, space_group=space_group) # the reciprocal matrix B = matrix.sqr(unit_cell.fractionalization_matrix()).transpose() n_predicted = flex.double() def predict_once(args): from dxtbx.model.experiment.experiment_list import Experiment U = args[0] A = U * B direct_matrix = A.inverse() cryst_model = crystal_model(direct_matrix[0:3], direct_matrix[3:6], direct_matrix[6:9], space_group=space_group) experiment = Experiment(imageset=sweep, beam=sweep.get_beam(), detector=sweep.get_detector(), goniometer=sweep.get_goniometer(), scan=sweep.get_scan(), crystal=cryst_model) predicted_reflections = flex.reflection_table.from_predictions( experiment) miller_indices = predicted_reflections['miller_index'] miller_set = miller.set( crystal_symmetry, miller_indices, anomalous_flag=True) if params.d_min is not None: resolution_sel = miller_set.d_spacings().data() > params.d_min predicted_reflections = predicted_reflections.select(resolution_sel) return len(predicted_reflections) from libtbx import easy_mp args = [(random_rotation(),) for i in range(params.n_samples)] results = easy_mp.parallel_map( func=predict_once, iterable=args, processes=params.nproc, preserve_order=True, preserve_exception_message=True) n_predicted = flex.double(results) print "Basic statistics:" from scitbx.math import basic_statistics stats = basic_statistics(n_predicted) stats.show() print "Histogram:" hist = flex.histogram(n_predicted, n_slots=20) hist.show() print "Raw spot counts:" print list(n_predicted) if params.plot: from matplotlib import pyplot from matplotlib.backends.backend_pdf import PdfPages pyplot.rc('font', family='serif') pyplot.rc('font', serif='Times New Roman') red, blue = '#B2182B', '#2166AC' fig = pyplot.figure() ax = fig.add_subplot(1,1,1) ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(), color=blue, edgecolor=blue) ax.set_xlabel('Spot count') ax.set_ylabel('Frequency') pdf = PdfPages("predicted_count_histogram.pdf") pdf.savefig(fig) pdf.close()