Python parallel_map Examples, libtbx.easy_mp.parallel_map Python Examples

Example #1

0

Show file

File: tst_index.py Project: biochem-fan/dials

def run(args):
  if not libtbx.env.has_module("dials_regression"):
    print "Skipping exercise_index_3D_FFT_simple: dials_regression not present"
    return

  exercises = (exercise_1, exercise_2, exercise_3, exercise_4, exercise_5,
               exercise_6, exercise_7, exercise_8, exercise_9, exercise_10,
               exercise_11, exercise_12, exercise_13, exercise_14, exercise_15,
               exercise_16)
  if len(args):
    args = [int(arg) for arg in args]
    for arg in args: assert arg > 0
    exercises = [exercises[arg-1] for arg in args]

  from libtbx import easy_mp

  nproc = easy_mp.get_processes(libtbx.Auto)
  nproc = min(nproc, len(exercises))

  def run_parallel(args):
    assert len(args) == 1
    exercise = args[0]
    exercise()

  easy_mp.parallel_map(
    func=run_parallel,
    iterable=[(e,) for e in exercises],
    processes=nproc)

Example #2

0

Show file

File: tst_easy_mp_state.py Project: yayahjb/cctbx_project

def exercise_multiprocessing(mp_nproc=1,
                             mp_threads=1,
                             mp_method="multiprocessing",
                             tasks=3):
    print("Running %s test with %d processes, %d threads, %d tasks" % \
      (mp_method, mp_nproc, mp_threads, tasks))

    # Create one shared instance of the state object and extract its initial state
    master_state_object = state_object()
    initial_state = master_state_object.get_state()

    # This is a function that changes the state on the object
    def change_stored_state(task):
        time.sleep(random.random() / 2 / tasks)
        master_state_object.generate_state()

    # Call the state-changing function in parallel
    easy_mp.parallel_map(iterable=range(tasks),
                         func=change_stored_state,
                         processes=mp_nproc,
                         method=mp_method)

    # Get the final state of the object
    final_state = master_state_object.get_state()

    # Did it change?
    assert initial_state != final_state

Example #3

0

Show file

File: tst_easy_mp_state.py Project: cctbx/cctbx-playground

def exercise_multiprocessing(mp_nproc=1, mp_threads=1, mp_method="multiprocessing", tasks=3):
  print "Running %s test with %d processes, %d threads, %d tasks" % \
    (mp_method, mp_nproc, mp_threads, tasks)

  # Create one shared instance of the state object and extract its initial state
  master_state_object = state_object()
  initial_state = master_state_object.get_state()

  # This is a function that changes the state on the object
  def change_stored_state(task):
    time.sleep(random.random() / 2 / tasks)
    master_state_object.generate_state()

  # Call the state-changing function in parallel
  easy_mp.parallel_map(
    iterable=range(tasks),
    func=change_stored_state,
    processes=mp_nproc,
    method=mp_method)

  # Get the final state of the object
  final_state = master_state_object.get_state()

  # Did it change?
  assert initial_state != final_state

Example #4

0

Show file

File: postrefine.py Project: elliottslaughter/cctbx_project

def scale_frames(frames, frame_files, iparams):
    """scale frames"""
    avg_mode = 'average'
    if iparams.flag_apply_b_by_frame:
        mean_of_mean_I = 0
    else:
        #Calculate <I> for each frame
        frame_args = [(frame_file, iparams, avg_mode)
                      for frame_file in frame_files]
        determine_mean_I_result = parallel_map(iterable=frame_args,
                                               func=determine_mean_I_mproc,
                                               processes=iparams.n_processors)
        frames_mean_I = flex.double()
        for result in determine_mean_I_result:
            if result is not None:
                mean_I, txt_out_result = result
                if mean_I is not None:
                    frames_mean_I.append(mean_I)
        mean_of_mean_I = np.median(frames_mean_I)
    #use the calculate <mean_I> to scale each frame
    frame_args = [(frame_no, frame_file, iparams, mean_of_mean_I, avg_mode)
                  for frame_no, frame_file in zip(frames, frame_files)]
    scale_frame_by_mean_I_result = parallel_map(
        iterable=frame_args,
        func=scale_frame_by_mean_I_mproc,
        processes=iparams.n_processors)
    observations_merge_mean_set = []
    for result in scale_frame_by_mean_I_result:
        if result is not None:
            pres, txt_out_result = result
            if pres is not None:
                observations_merge_mean_set.append(pres)
    return observations_merge_mean_set

Example #5

0

Show file

File: iota_threads.py Project: yayahjb/cctbx_project

    def run(self):
        try:
            parallel_map(iterable=self.data_list,
                         func=self.spf_wrapper,
                         callback=self.callback,
                         processes=self.n_proc)
        except IOTATermination as e:
            self.terminated = True
            print(e)

        # Signal that this batch is finished
        try:
            if self.terminated:
                print('RUN TERMINATED!')
                evt = SpotFinderTerminated(tp_EVT_SPFTERM, -1)
                wx.PostEvent(self.parent, evt)

            wx.CallAfter(self.parent.onSpfAllDone, self.data_list)

            # info = self.data_list
            # evt = SpotFinderAllDone(tp_EVT_SPFALLDONE, -1, info=info)
            # wx.PostEvent(self.parent, evt)
            return
        except TypeError as e:
            print(e)
            return

Example #6

0

Show file

File: tst_index.py Project: hackerlank/dials

def run(args):
    if not libtbx.env.has_module("dials_regression"):
        print "Skipping exercise_index_3D_FFT_simple: dials_regression not present"
        return

    exercises = (exercise_1, exercise_2, exercise_3, exercise_4, exercise_5,
                 exercise_6, exercise_7, exercise_8, exercise_9, exercise_10,
                 exercise_11, exercise_12, exercise_13, exercise_14,
                 exercise_15, exercise_16, exercise_17)
    if len(args):
        args = [int(arg) for arg in args]
        for arg in args:
            assert arg > 0
        exercises = [exercises[arg - 1] for arg in args]

    from libtbx import easy_mp

    nproc = easy_mp.get_processes(libtbx.Auto)
    nproc = min(nproc, len(exercises))

    def run_parallel(args):
        assert len(args) == 1
        exercise = args[0]
        exercise()

    easy_mp.parallel_map(func=run_parallel,
                         iterable=[(e, ) for e in exercises],
                         processes=nproc)

Example #7

0

Show file

  def process(self):
    '''
    Do all the processing tasks.

    :return: The processing results

    '''
    from time import time
    from libtbx import easy_mp
    import platform
    start_time = time()
    self.manager.initialize()
    mp_method = self.manager.params.mp.method
    mp_nproc = min(len(self.manager), self.manager.params.mp.nproc)
    mp_nthreads = self.manager.params.mp.nthreads
    if mp_nproc > 1 and platform.system() == "Windows": # platform.system() forks which is bad for MPI, so don't use it unless nproc > 1
      logger.warn("")
      logger.warn("*" * 80)
      logger.warn("Multiprocessing is not available on windows. Setting nproc = 1")
      logger.warn("*" * 80)
      logger.warn("")
      mp_nproc = 1
    assert mp_nproc > 0, "Invalid number of processors"
    job.nthreads = mp_nthreads
    logger.info(self.manager.summary())
    logger.info(' Using %s with %d parallel job(s) and %d thread(s) per job\n' % (
      mp_method, mp_nproc, mp_nthreads))
    if mp_nproc > 1:
      def process_output(result):
        for message in result[1]:
          logger.log(message.levelno, message.msg)
        self.manager.accumulate(result[0])
        result[0].reflections = None
        result[0].data = None
      def execute_task(task):
        from cStringIO import StringIO
        from dials.util import log
        import logging
        log.config_simple_cached()
        result = task()
        handlers = logging.getLogger('dials').handlers
        assert len(handlers) == 1, "Invalid number of logging handlers"
        return result, handlers[0].messages()
      easy_mp.parallel_map(
        func=execute_task,
        iterable=list(self.manager.tasks()),
        processes=mp_nproc,
        callback=process_output,
        method=mp_method,
        preserve_order=True,
        preserve_exception_message=True)
    else:
      for task in self.manager.tasks():
        self.manager.accumulate(task())
    self.manager.finalize()
    end_time = time()
    self.manager.time.user_time = end_time - start_time
    result1, result2 = self.manager.result()
    return result1, result2, self.manager.time

Example #8

0

Show file

File: iota_threads.py Project: cctbx-xfel/cctbx_project

 def run(self):
     try:
         parallel_map(iterable=self.data_list,
                      func=self.spf_wrapper,
                      callback=self.callback,
                      preserve_exception_message=True,
                      processes=None)
     except Exception, e:
         print 'SPOTFINDING THREAD:', e

Example #9

0

Show file

File: iota_process.py Project: cctbx/cctbx-playground

  def run(self):
    parallel_map(iterable=self.iterable,
                 func = self.full_proc_wrapper,
                 #callback = self.callback,
                 processes=self.init.params.n_processors)

    end_filename = os.path.join(self.init.tmp_base, 'finish.cfg')
    with open(end_filename, 'w') as ef:
      ef.write('')

Example #10

0

Show file

 def run(self):
     try:
         parallel_map(iterable=self.data_list,
                      func=self.spf_wrapper,
                      callback=self.callback,
                      processes=self.n_proc)
     except IOTATermination, e:
         self.terminated = True
         print e

Example #11

0

Show file

    def run(self):
        parallel_map(
            iterable=self.iterable,
            func=self.full_proc_wrapper,
            #callback = self.callback,
            processes=self.init.params.n_processors)

        end_filename = os.path.join(self.init.tmp_base, 'finish.cfg')
        with open(end_filename, 'w') as ef:
            ef.write('')

Example #12

0

Show file

File: iota_threads.py Project: renesugar/cctbx_project

 def run(self):
     total_procs = multiprocessing.cpu_count()
     try:
         parallel_map(iterable=self.data_list,
                      func=self.spf_wrapper,
                      callback=self.callback,
                      processes=total_procs - 5)
     except IOTATermination, e:
         self.terminated = True
         print e

Example #13

0

Show file

 def run(self):
     try:
         parallel_map(iterable=self.iterable,
                      func=self.full_proc_wrapper,
                      processes=self.init.params.n_processors)
         end_filename = os.path.join(self.init.tmp_base, 'finish.cfg')
         with open(end_filename, 'w') as ef:
             ef.write('')
     except IOTATermination as e:
         aborted_file = os.path.join(self.init.int_base, '.aborted.tmp')
         with open(aborted_file, 'w') as abtf:
             abtf.write('')
         raise e

Example #14

0

Show file

File: mp.py Project: dials/dials

def multi_node_parallel_map(
    func,
    iterable,
    njobs=1,
    nproc=1,
    cluster_method=None,
    asynchronous=True,
    callback=None,
    preserve_order=True,
    preserve_exception_message=False):
  '''
  A wrapper function to call a function using multiple cluster nodes and with
  multiple processors on each node

  '''
  from libtbx.easy_mp import parallel_map

  # The function to all on the cluster
  cluster_func = MultiNodeClusterFunction(
    func                       = func,
    nproc                      = nproc,
    asynchronous               = asynchronous,
    preserve_order             = preserve_order,
    preserve_exception_message = preserve_exception_message)

  # Create the cluster iterable
  cluster_iterable = iterable_grouper(iterable, nproc)

  # Create the cluster callback
  if callback is not None:
    cluster_callback = MultiNodeClusterCallback(callback)
  else:
    cluster_callback = None

  # Set the command
  qsub_command = 'qsub -pe smp %d' % nproc

  # Do the parallel map on the cluster
  parallel_map(
    func                       = cluster_func,
    iterable                   = cluster_iterable,
    callback                   = cluster_callback,
    method                     = cluster_method,
    processes                  = njobs,
    qsub_command               = qsub_command,
    asynchronous               = asynchronous,
    preserve_order             = preserve_order,
    preserve_exception_message = preserve_exception_message)

Example #15

0

Show file

File: postrefine.py Project: elliottslaughter/cctbx_project

def postrefine_frames(i_iter, frames, frame_files, iparams, pres_set,
                      miller_array_ref, avg_mode):
    """postrefine given frames and previous postrefinement results"""
    miller_array_ref = miller_array_ref.generate_bijvoet_mates()
    txt_merge_postref = 'Post-refinement cycle ' + str(
        i_iter + 1) + ' (' + avg_mode + ')\n'
    txt_merge_postref += ' * R and CC show percent change.\n'
    print txt_merge_postref
    frame_args = [
        (frame_no, frame_file, iparams, miller_array_ref, pres_in, avg_mode)
        for frame_no, frame_file, pres_in in zip(frames, frame_files, pres_set)
    ]
    postrefine_by_frame_result = parallel_map(iterable=frame_args,
                                              func=postrefine_by_frame_mproc,
                                              processes=iparams.n_processors)
    postrefine_by_frame_good = []
    postrefine_by_frame_pres_list = []
    for results in postrefine_by_frame_result:
        if results is not None:
            pres, txt_out_result = results
            postrefine_by_frame_pres_list.append(pres)
            if pres is not None:
                postrefine_by_frame_good.append(pres)
        else:
            postrefine_by_frame_pres_list.append(None)
    return postrefine_by_frame_good, postrefine_by_frame_pres_list, txt_merge_postref

Example #16

0

Show file

File: iota_base.py Project: oliserand/cctbx_project

  def run_process(self, iterable):
    # Create ExperimentList objects from image paths (doing it in the processor
    # because I can't have Python objects in the INFO JSON file)
    adj_iterable = []
    imageseq = None
    crystal = None
    for entry in iterable:
      path = str(entry[1])
      if path.endswith('.h5'):
        exp_idx = entry[0]
        img_idx = entry[2]
        if imageseq is None:
          exps = ExLF.from_filenames(filenames=[path])
          imageseq = exps.imagesets()[0]
          crystal = exps[0].crystal
        one_image = imageseq.partial_set(img_idx, img_idx + 1)
        one_exp = ExLF.from_imageset_and_crystal(imageset=one_image,
                                                 crystal=crystal)
        adj_iterable.append([exp_idx, path, img_idx, one_exp])
      else:
        # Create ExperimentList object from CBF
        expr = ExLF.from_filenames(filenames=[path])
        exp_entry = [entry[0], entry[1], 0, expr]
        adj_iterable.append(exp_entry)

    # Run a multiprocessing job
    img_objects = parallel_map(iterable=adj_iterable,
                               func=self.import_and_process,
                               callback=self.callback,
                               processes=self.params.mp.n_processors)

    return img_objects

Example #17

0

Show file

File: tst_easy_mp.py Project: zhuligs/cctbx_project

def check_if_stacktrace_is_propagated_properly(method, nproc):
    exception_seen = False
    from libtbx.easy_mp import parallel_map
    import traceback

    try:
        results = parallel_map(func=_may_divide_by_zero,
                               iterable=[2, 1, 0],
                               method=method,
                               processes=nproc,
                               preserve_exception_message=True)
    except ZeroDivisionError, e:
        exception_seen = True
        exc_type, exc_value, exc_traceback = sys.exc_info()
        assert "division by zero" in str(
            exc_value.message), "Exception value mismatch: '%s'" % exc_value

        stack_contains_fail_function = False
        # Two options: Either the original stack is available directly
        for (filename, line, function,
             text) in traceback.extract_tb(exc_traceback):
            if function == _may_divide_by_zero.func_name:
                stack_contains_fail_function = True
        # or it should be preserved in the string representation of the exception
        from libtbx.scheduling import stacktrace
        ex, st = stacktrace.exc_info()
        if ex is not None and _may_divide_by_zero.func_name in "".join(st):
            stack_contains_fail_function = True
        if not stack_contains_fail_function:
            print "Thrown exception: %s:" % str(e)
            traceback.print_tb(exc_traceback)
            print ""
            assert stack_contains_fail_function, "Stacktrace lost"

Example #18

0

Show file

File: candidate_cells.py Project: dials/cctbx

def print_results(candidates, params):
    '''
  Take a list of candidates and sort into groups with matching unit cells. For
  each group, assign it the cell parameters of the cell giving the best score.
  Sort groups by score and print the scores and cell parameters.
  '''
    i_first_matching_partial = functools.partial(i_first_matching,
                                                 cand_list=candidates)
    i_first_matching_list = easy_mp.parallel_map(
        i_first_matching_partial,
        candidates,
        processes=params.multiprocessing.nproc)

    i_first_matching_unique = set(i_first_matching_list)
    results = []
    for i in i_first_matching_unique:
        matches = [
            cand for i_cand, cand in enumerate(candidates)
            if i == i_first_matching_list[i_cand]
        ]
        best = min(matches, key=lambda m: m.score)
        results.append(best)
    results.sort(key=lambda r: r.score)
    for r in results[:10]:
        print("{:.4f}\t{}".format(r.score, r))

Example #19

0

Show file

File: tst_easy_mp.py Project: cctbx/cctbx-playground

def check_if_stacktrace_is_propagated_properly(method, nproc):
  exception_seen = False
  from libtbx.easy_mp import parallel_map
  import traceback

  try:
    results = parallel_map(
      func=_may_divide_by_zero,
      iterable=[2,1,0],
      method=method,
      processes=nproc,
      preserve_exception_message=True)
  except ZeroDivisionError, e:
    exception_seen = True
    exc_type, exc_value, exc_traceback = sys.exc_info()
    assert "division by zero" in str(exc_value.message), "Exception value mismatch: '%s'" % exc_value

    stack_contains_fail_function = False
    # Two options: Either the original stack is available directly
    for (filename, line, function, text) in traceback.extract_tb(exc_traceback):
      if function == _may_divide_by_zero.func_name:
        stack_contains_fail_function = True
    # or it should be preserved in the string representation of the exception
    from libtbx.scheduling import stacktrace
    ex, st = stacktrace.exc_info()
    if ex is not None and _may_divide_by_zero.func_name in "".join( st ):
      stack_contains_fail_function = True
    if not stack_contains_fail_function:
      print "Thrown exception: %s:" % str(e)
      traceback.print_tb(exc_traceback)
      print ""
      assert stack_contains_fail_function, "Stacktrace lost"

Example #20

0

Show file

File: engine.py Project: dials/dials

    def compute_functional_gradients_and_curvatures(self):

        self.prepare_for_step()

        # observation terms
        blocks = self._target.split_matches_into_blocks(nproc=self._nproc)
        if self._nproc > 1:
            task_results = easy_mp.parallel_map(
                func=self._target.compute_functional_gradients_and_curvatures,
                iterable=blocks,
                processes=self._nproc,
                method="multiprocessing",
                # preserve_exception_message=True
            )

        else:
            task_results = [self._target.compute_functional_gradients_and_curvatures(block) for block in blocks]

        # reduce blockwise results
        flist, glist, clist = zip(*task_results)
        glist = zip(*glist)
        clist = zip(*clist)
        f = sum(flist)
        g = [sum(g) for g in glist]
        c = [sum(c) for c in clist]

        # restraints terms
        restraints = self._target.compute_restraints_functional_gradients_and_curvatures()

        if restraints:
            f += restraints[0]
            g = [a + b for a, b in zip(g, restraints[1])]
            c = [a + b for a, b in zip(c, restraints[2])]

        return f, flex.double(g), flex.double(c)

Example #21

0

Show file

File: easy_mp.py Project: cctbx/cctbx-playground

def run_parallel(
   method='multiprocessing',  # multiprocessing, only choice for now
   qsub_command='qsub',       # queue command, not supported yet
   nproc=1,                   # number of processors to use
   target_function=None,      # the method to run
   kw_list=None):             # list of kw dictionaries for target_function

  n=len(kw_list)  # number of jobs to run, one per kw dict

  if nproc==1 or n<=1: # just run it for each case in list, no multiprocessing
    results=[]
    ra=run_anything(kw_list=kw_list,target_function=target_function)
    for i in xrange(n):
      results.append(ra(i))
  elif 0:  #(method == "multiprocessing") and (sys.platform != "win32") :
    # XXX Can crash 2015-10-13 TT so don't use it
    from libtbx.easy_mp import  pool_map
    results = pool_map(
      func=run_anything(target_function=target_function,kw_list=kw_list),
      iterable=xrange(n),
      processes=nproc)
  else :
    from libtbx.easy_mp import parallel_map
    results=parallel_map(
      func=run_anything(target_function=target_function,kw_list=kw_list),
      iterable=xrange(n),
      method=method,
      processes=nproc,
      callback=None,
      preserve_exception_message=True, # 2016-08-17
      qsub_command=qsub_command,
      use_manager=True )#  Always use manager 2015-10-13 TT (sys.platform == "win32"))
  return results

Example #22

0

Show file

File: table_one.py Project: keitaroyam/cctbx_fork

 def __init__(self, params, out=sys.stdout):
     iotbx.table_one.table.__init__(
         self,
         text_field_separation=params.output.text_field_separation,
         count_anomalous_pairs_separately=params.processing.count_anomalous_pairs_separately,
     )
     self.output_dir = os.getcwd()
     self.params = params
     self.output_files = []
     make_header("Running data analysis and validation", out=out)
     results = easy_mp.parallel_map(
         iterable=range(len(self.params.structure)),
         func=self.run_single_structure,
         processes=params.multiprocessing.nproc,
         method=params.multiprocessing.technology,
         preserve_exception_message=True,
     )
     for structure, result in zip(params.structure, results):
         print >> out, ""
         print >> out, "Collecting stats for structure %s" % structure.name
         column = result.validation.as_table1_column(
             label=structure.name,
             wavelength=structure.wavelength,
             re_compute_r_factors=params.processing.re_compute_r_factors,
             log=out,
         )
         self.add_column(column)

Example #23

0

Show file

def run_parallel(
   method='multiprocessing',  # multiprocessing, only choice for now
   qsub_command='qsub',       # queue command, not supported yet
   nproc=1,                   # number of processors to use
   target_function=None,      # the method to run
   kw_list=None):             # list of kw dictionaries for target_function

  n=len(kw_list)  # number of jobs to run, one per kw dict

  if nproc==1 or n<=1: # just run it for each case in list, no multiprocessing
    results=[]
    ra=run_anything(kw_list=kw_list,target_function=target_function)
    for i in xrange(n):
      results.append(ra(i))
  elif 0:  #(method == "multiprocessing") and (sys.platform != "win32") :
    # XXX Can crash 2015-10-13 TT so don't use it
    from libtbx.easy_mp import  pool_map
    results = pool_map(
      func=run_anything(target_function=target_function,kw_list=kw_list),
      iterable=xrange(n),
      processes=nproc)
  else :
    from libtbx.easy_mp import parallel_map
    results=parallel_map(
      func=run_anything(target_function=target_function,kw_list=kw_list),
      iterable=xrange(n),
      method=method,
      processes=nproc,
      callback=None,
      preserve_exception_message=True, # 2016-08-17
      qsub_command=qsub_command,
      use_manager=True )#  Always use manager 2015-10-13 TT (sys.platform == "win32"))
  return results

Example #24

0

Show file

File: obsolete_predictors.py Project: dials/dials_scratch

    def step_over_images(self):
        """Loop over images, doing the search on each and extending the
        predictions list"""

        from libtbx import easy_mp

        # from dials.util import mp
        n_images = self._scan.get_num_images()

        # Change the number of processors if necessary
        nproc = 1
        if nproc > n_images:
            nproc = n_images

        iterable = self._make_blocks(n_images, nproc)

        ref_list_of_list = easy_mp.parallel_map(
            func=self._search_on_image_range,
            iterable=iterable,
            processes=nproc,
            method="multiprocessing",
            preserve_order=True,
        )

        self._reflections = [e for l in ref_list_of_list for e in l]
        return

Example #25

0

Show file

File: run_all_xds_simple.py Project: nsls-ii-mx/yamtbx

def run(params):
    params.topdir = os.path.abspath(params.topdir)

    xds_dirs = []
    print "Found xds directories:"
    for root, dirnames, filenames in os.walk(params.topdir, followlinks=True):
        if "XDS.INP" in filenames:
            if "decision.log" in filenames and params.dont_overwrite:
                print "Already done - skip:", os.path.relpath(
                    root, params.topdir)
                continue
            print "", os.path.relpath(root, params.topdir)
            xds_dirs.append(root)

    print
    print "Start running.."

    import functools
    from yamtbx.dataproc.auto.command_line import run_all_xds_simple

    if params.multiproc:
        npar = util.get_number_of_processors(
        ) if params.nproc is None else params.nproc

        # Override nproc
        if len(xds_dirs) < npar: params.nproc = npar // len(xds_dirs)
        else: params.nproc = 1
        print "nproc=", params.nproc

        if params.parmethod == "sge": npar = len(xds_dirs)

        fun = run_all_xds_simple.xds_runmanager(params)
        easy_mp.parallel_map(func=fun,
                             iterable=map(lambda x: os.path.abspath(x),
                                          xds_dirs),
                             processes=npar,
                             method=params.parmethod,
                             preserve_exception_message=True)
        """
        fun_local = lambda x: xds_sequence(x, params)
        easy_mp.pool_map(fixed_func=fun_local,
                         args=xds_dirs,
                         processes=npar)
        """
    else:
        for root in xds_dirs:
            run_xds_sequence(root, params)

Example #26

0

Show file

File: finder.py Project: biochem-fan/dials

def batch_parallel_map(func=None, iterable=None, processes=None, callback=None, method=None, chunksize=1):
    """
  A function to run jobs in batches in each process

  """
    from libtbx import easy_mp

    # Call the batches in parallel
    easy_mp.parallel_map(
        func=batch_func(func),
        iterable=batch_iterable(iterable, chunksize),
        processes=processes,
        callback=batch_callback(callback),
        method=method,
        preserve_order=True,
        preserve_exception_message=True,
    )

Example #27

0

Show file

File: mp.py Project: hackerlank/dials

def multi_node_parallel_map(func,
                            iterable,
                            njobs=1,
                            nproc=1,
                            cluster_method=None,
                            asynchronous=True,
                            callback=None,
                            preserve_order=True,
                            preserve_exception_message=False):
    '''
  A wrapper function to call a function using multiple cluster nodes and with
  multiple processors on each node

  '''
    from libtbx.easy_mp import parallel_map

    # The function to all on the cluster
    cluster_func = MultiNodeClusterFunction(
        func=func,
        nproc=nproc,
        asynchronous=asynchronous,
        preserve_order=preserve_order,
        preserve_exception_message=preserve_exception_message)

    # Create the cluster iterable
    cluster_iterable = iterable_grouper(iterable, nproc)

    # Create the cluster callback
    if callback is not None:
        cluster_callback = MultiNodeClusterCallback(callback)
    else:
        cluster_callback = None

    # Set the command
    qsub_command = 'qsub -pe smp %d' % nproc

    # Do the parallel map on the cluster
    parallel_map(func=cluster_func,
                 iterable=cluster_iterable,
                 callback=cluster_callback,
                 method=cluster_method,
                 processes=njobs,
                 qsub_command=qsub_command,
                 asynchronous=asynchronous,
                 preserve_order=preserve_order,
                 preserve_exception_message=preserve_exception_message)

Example #28

0

Show file

def run():
    with open(REFLS) as f:
        refls, weights = [], []
        for line in f.readlines():
            d, w = [float(x) for x in line.strip().split(',')]
            refls.append(d)
            weights.append(w)
    with open(KNOWN_GOOD) as f:
        known_good = [float(line.strip()) for line in f.readlines()]

    call_gsas((refls, weights, known_good, None))
    quit()
    #  import profile
    #  profile.runctx('call_gsas((refls, weights, known_good, None))', globals(), locals(), filename='call_gsas.prof')
    #  quit()

    current_cells = easy_mp.parallel_map(call_gsas,
                                         [(refls, weights, known_good, None)
                                          for _ in range(NPROC)],
                                         processes=NPROC)

    cell_man = Candidate_cell_manager()

    current_cells_flat = []
    for l in current_cells:
        current_cells_flat.extend(l)
    for gcell in current_cells_flat:
        cell_man.store_cell(gcell)
    cell_man.maintain(force=True)
    min_score = cell_man.min_score

    current_cells = easy_mp.parallel_map(
        call_gsas,
        [(refls, weights, known_good, min_score) for _ in range(NPROC * 3)],
        processes=NPROC)

    current_cells_flat = []
    for l in current_cells:
        current_cells_flat.extend(l)
    for gcell in current_cells_flat:
        cell_man.store_cell(gcell)
    cell_man.maintain(force=True)

    with open(sys.argv[1], 'wb') as f:
        pickle.dump(cell_man, f)

Example #29

0

Show file

 def run(self):
     try:
         img_objects = parallel_map(iterable=self.iterable,
                                    func=self.full_proc_wrapper,
                                    processes=self.init.params.n_processors)
     except IOTATermination, e:
         self.aborted = True
         print e
         return

Example #30

0

Show file

File: run_all_xds_simple.py Project: harumome/kamo

def run(params):
    xds_dirs = []
    print "Found xds directories:"
    for root, dirnames, filenames in os.walk(params.topdir, followlinks=True):
        if "XDS.INP" in filenames:
            if "decision.log" in filenames and params.dont_overwrite:
                print "Already done - skip:", os.path.relpath(root, params.topdir)
                continue
            print "", os.path.relpath(root, params.topdir)
            xds_dirs.append(root)

    print
    print "Start running.."

    import functools
    from yamtbx.dataproc.auto.command_line import run_all_xds_simple

    if params.multiproc:
        npar = util.get_number_of_processors() if params.nproc is None else params.nproc

        # Override nproc
        if len(xds_dirs) < npar: params.nproc  = npar // len(xds_dirs)
        else: params.nproc = 1
        print "nproc=", params.nproc

        if params.parmethod == "sge": npar = len(xds_dirs)

        fun = run_all_xds_simple.xds_runmanager(params)
        easy_mp.parallel_map(func=fun,
                             iterable=map(lambda x: os.path.abspath(x), xds_dirs),
                             processes=npar,
                             method=params.parmethod,
                             preserve_exception_message=True)

        """
        fun_local = lambda x: xds_sequence(x, params)
        easy_mp.pool_map(fixed_func=fun_local,
                         args=xds_dirs,
                         processes=npar)
        """
    else:
        for root in xds_dirs:
            xds_sequence(root, params)

Example #31

0

Show file

File: iota_threads.py Project: cctbx-xfel/cctbx_project

 def run(self):
     if self.init.params.mp_method == 'multiprocessing':
         try:
             img_objects = parallel_map(
                 iterable=self.iterable,
                 func=self.full_proc_wrapper,
                 processes=self.init.params.n_processors)
         except Exception, e:
             print e
             return

Example #32

0

Show file

def run(args):
    cmd_line = command_line.argument_interpreter(
        master_params=master_phil_scope)
    working_phil = cmd_line.process_and_fetch(args=args)
    working_phil.show()
    params = working_phil.extract()
    if params.find_spots_phil is not None:
        params.find_spots_phil = os.path.abspath(params.find_spots_phil)
        assert os.path.isfile(params.find_spots_phil)
    if params.index_phil is not None:
        params.index_phil = os.path.abspath(params.index_phil)
        assert os.path.isfile(params.index_phil)

    templates = params.template
    print(templates)

    args = []

    filenames = []

    for t in templates:
        print(t)
        filenames.extend(glob.glob(t))
    print(filenames)
    from dxtbx.imageset import ImageSetFactory, ImageSweep
    from dxtbx.datablock import DataBlockFactory

    datablocks = DataBlockFactory.from_args(filenames, verbose=True)

    i = 0
    for i, datablock in enumerate(datablocks):
        sweeps = datablock.extract_sweeps()
        for imageset in sweeps:
            if (isinstance(imageset, ImageSweep)
                    and len(imageset) >= params.min_sweep_length):
                i += 1
                print(imageset)
                print(imageset.get_template())
                args.append((imageset.paths(), i, params))

    # sort based on the first filename of each imageset
    args.sort(key=lambda x: x[0][0])

    nproc = params.nproc
    results = easy_mp.parallel_map(
        func=run_once,
        iterable=args,
        processes=nproc,
        method=params.technology,
        qsub_command=params.qsub_command,
        preserve_order=True,
        asynchronous=False,
        preserve_exception_message=True,
    )

Example #33

0

Show file

    def integrate(self):
        """Do all the integration tasks.

        Returns
          The integration results

        """
        from time import time
        from libtbx import easy_mp

        start_time = time()
        num_proc = len(self._manager)
        if self._max_procs > 0:
            num_proc = min(num_proc, self._max_procs)
        if num_proc > 1:

            def process_output(result):
                self._manager.accumulate(result[0])
                print(result[1])

            def execute_task(task):
                from cStringIO import StringIO
                import sys

                sys.stdout = StringIO()
                result = task()
                output = sys.stdout.getvalue()
                return result, output

            task_results = easy_mp.parallel_map(
                func=execute_task,
                iterable=list(self._manager.tasks()),
                processes=num_proc,
                callback=process_output,
                method=self._mp_method,
                preserve_order=True,
                preserve_exception_message=True,
            )
            task_results, output = zip(*task_results)
        else:
            task_results = [task() for task in self._manager.tasks()]
            for result in task_results:
                self._manager.accumulate(result)
        assert self._manager.finished()
        end_time = time()
        read_time = self._manager.read_time
        extract_time = self._manager.extract_time
        process_time = self._manager.process_time
        total_time = end_time - start_time
        print("Time taken: reading images: %.2f seconds" % read_time)
        print("Time taken: extracting pixels: %.2f seconds" % extract_time)
        print("Time taken: processing data: %.2f seconds" % process_time)
        print("Time taken: total: %.2f seconds" % total_time)
        return self._manager.result()

Example #34

0

Show file

File: wrappers.py Project: keitaroyam/cctbx_fork

def spotfinder_factory(absrundir,frames,phil_params):

  local_frames=frames.frames()

  A = frames.images[0]
  #A.readHeader()--deprecate this because it squashes any overrides
  #                from dataset_preferences processed in imagefiles.py
  pd = {'directory':frames.filenames.FN[0].cwd,
        'template': frames.filenames.FN[0].template,
        'identifier':frames.filenames.FN[0].fileroot,
        'vendortype':A.vendortype,
        'binning':'%d'%A.bin,
        'distance':'%f'%A.distance,
        'wavelength':'%f'%A.wavelength,
        'deltaphi':'%f'%A.deltaphi,
        }

  #temp values for getting coordinate convention
  pd['pixel_size']='%f'%A.pixel_size
  pd['size1']='%f'%A.size1
  pd['size2']='%f'%A.size2
  pd['ybeam'] = '%f'%A.beamy
  pd['xbeam'] = '%f'%A.beamx
  try:
    pd['twotheta'] = '%f'%A.twotheta
  except Exception:
    pd['twotheta'] = '0.0'

  from spotfinder.applications.practical_heuristics import heuristics_base
  Spotfinder = heuristics_base(pd,phil_params)

  from libtbx import easy_mp

  def run_spotfinder(args):
    assert len(args) == 2
    framenumber, frames = args
    try:
      assert Spotfinder.images.has_key(framenumber)
    except Exception:
      Spotfinder.register_frames(framenumber,frames)
      if phil_params.spotfinder_verbose: Spotfinder.show()
    return Spotfinder

  iterable = [(framenumber, frames) for framenumber in local_frames]
  results = easy_mp.parallel_map(
    func=run_spotfinder,
    iterable=iterable,
    processes=phil_params.distl.nproc,
    method="multiprocessing",
    preserve_order=True
  )
  for result in results:
    Spotfinder.images.update(result.images)
  return Spotfinder

Example #35

0

Show file

def crystals_refiner(params, experiments, reflections):
    def do_work(item):
        iexp, exp = item

        print("Refining crystal", iexp)
        # reflection subset for a single experiment
        refs = reflections.select(reflections['id'] == iexp)
        refs['id'] = flex.int(len(refs), 0)

        # DGW commented out as reflections.minimum_number_of_reflections no longer exists
        #if len(refs) < params.refinement.reflections.minimum_number_of_reflections:
        #  print "Not enough reflections to refine experiment"
        #  return

        # experiment list for a single experiment
        exps = ExperimentList()
        exps.append(exp)
        try:
            refiner = RefinerFactory.from_parameters_data_experiments(
                params, refs, exps)
            # do refinement
            refiner.run()
        except Exception as e:
            print("Error,", str(e))
            return

        refined_exps = refiner.get_experiments()
        # replace this experiment with the refined one
        experiments[iexp] = refined_exps[0]

    print("Beginning crystal refinement with %d processor(s)" %
          params.mp.nproc)
    easy_mp.parallel_map(func=do_work,
                         iterable=enumerate(experiments),
                         processes=params.mp.nproc,
                         method=params.mp.method,
                         asynchronous=True,
                         preserve_exception_message=True)

    return experiments

Example #36

0

Show file

File: test_reg_00_base.py Project: zhaoyage/qrefine

 def run(self):
     test_results = parallel_map(
         func=self.func,
         iterable=self.pdbs,
         #      method='pbs',
         method='multiprocessing',
         preserve_exception_message=True,
         #      processes=4,
         processes=len(self.pdbs),
         qsub_command=qsub_command,
         use_manager=True)
     for test_result in test_results:
         self.check_assertions(test_results)

Example #37

0

Show file

File: iota_run.py Project: hainm/cctbx_project

 def run_process(self):
   ''' Run indexing / integration of imported images '''
   cmd.Command.start("Processing {} images".format(len(self.img_objects)))
   self.img_list = [[i, len(self.img_objects) + 1, j] for i, j in
                     enumerate(self.img_objects, 1)]
   self.prog_count = 0
   self.gs_prog = cmd.ProgressBar(title='PROCESSING')
   self.img_objects = parallel_map(iterable=self.img_list,
                              func=self.proc_wrapper,
                              callback=self.callback,
                              processes=self.init.params.n_processors)
   cmd.Command.end("Processing {} images -- DONE "
                   "".format(len(self.img_objects)))

Example #38

0

Show file

File: mp.py Project: hackerlank/dials

    def __call__(self, iterable):
        '''
    Call the function

    '''
        from libtbx.easy_mp import parallel_map
        return parallel_map(
            func=self.func,
            iterable=iterable,
            processes=self.nproc,
            method="multiprocessing",
            asynchronous=self.asynchronous,
            preserve_order=self.preserve_order,
            preserve_exception_message=self.preserve_exception_message)

Example #39

0

Show file

File: mp.py Project: dials/dials

  def __call__(self, iterable):
    '''
    Call the function

    '''
    from libtbx.easy_mp import parallel_map
    return parallel_map(
      func                       = self.func,
      iterable                   = iterable,
      processes                  = self.nproc,
      method                     = "multiprocessing",
      asynchronous               = self.asynchronous,
      preserve_order             = self.preserve_order,
      preserve_exception_message = self.preserve_exception_message)

Example #40

0

Show file

def run_all(qm_engine,
            fragments_extracted,
            indices,
            method='multiprocessing',
            processes=1,
            qsub_command=None,
            callback=None):
    qm_engine_object = qm_energy_manager(qm_engine, fragments_extracted)
    from libtbx.easy_mp import parallel_map
    return parallel_map(func=qm_engine_object,
                        iterable=indices,
                        method=method,
                        processes=processes,
                        callback=callback,
                        qsub_command=qsub_command)

Example #41

0

Show file

File: interface.py Project: dials/dials_scratch

  def integrate(self):
    ''' Do all the integration tasks.

    Returns
      The integration results

    '''
    from time import time
    from libtbx import easy_mp
    start_time = time()
    num_proc = len(self._manager)
    if self._max_procs > 0:
      num_proc = min(num_proc, self._max_procs)
    if num_proc > 1:
      def process_output(result):
        self._manager.accumulate(result[0])
        print result[1]
      def execute_task(task):
        from cStringIO import StringIO
        import sys
        sys.stdout = StringIO()
        result = task()
        output = sys.stdout.getvalue()
        return result, output
      task_results = easy_mp.parallel_map(
        func=execute_task,
        iterable=list(self._manager.tasks()),
        processes=num_proc,
        callback=process_output,
        method=self._mp_method,
        preserve_order=True,
        preserve_exception_message=True)
      task_results, output = zip(*task_results)
    else:
      task_results = [task() for task in self._manager.tasks()]
      for result in task_results:
        self._manager.accumulate(result)
    assert(self._manager.finished())
    end_time = time()
    read_time = self._manager.read_time
    extract_time = self._manager.extract_time
    process_time = self._manager.process_time
    total_time = end_time - start_time
    print "Time taken: reading images: %.2f seconds" % read_time
    print "Time taken: extracting pixels: %.2f seconds" % extract_time
    print "Time taken: processing data: %.2f seconds" % process_time
    print "Time taken: total: %.2f seconds" % total_time
    return self._manager.result()

Example #42

0

Show file

File: iota_threads.py Project: elliottslaughter/cctbx_project

    def run(self):
        try:
            img_objects = parallel_map(iterable=self.iterable,
                                       func=self.full_proc_wrapper,
                                       processes=self.init.params.n_processors)
        except IOTATermination as e:
            self.aborted = True
            print e
            return

        # Send "all done" event to GUI
        try:
            evt = AllDone(tp_EVT_ALLDONE, -1, img_objects=img_objects)
            wx.PostEvent(self.parent, evt)
        except Exception as e:
            pass

Example #43

0

Show file

File: batch_analysis.py Project: hackerlank/dials

def work_all(filenames, args, nproc):
    from libtbx import easy_mp

    cl = args
    args = []
    for f in filenames:
        args.append((f, cl))

    results = easy_mp.parallel_map(func=work,
                                   iterable=args,
                                   processes=nproc,
                                   method="multiprocessing",
                                   preserve_order=True,
                                   asynchronous=True,
                                   preserve_exception_message=True)

    return results

Example #44

0

Show file

File: batch_analysis.py Project: dials/dials

def work_all(filenames, args, nproc):
  from libtbx import easy_mp

  cl = args
  args = []
  for f in filenames:
    args.append((f, cl))

  results = easy_mp.parallel_map(
    func=work,
    iterable=args,
    processes=nproc,
    method="multiprocessing",
    preserve_order=True,
    asynchronous=True,
    preserve_exception_message=True)

  return results

Example #45

0

Show file

File: iota_run.py Project: cctbx/cctbx-playground

  def run_import(self):
    ''' Import images or image objects '''
    if self.init.params.cctbx.selection.select_only.flag_on:
      msg = "Reading {} image objects".format(len(self.init.gs_img_objects))
      title = 'READING IMAGE OBJECTS'
      self.img_list = [[i, len(self.init.gs_img_objects) + 1, j] for i, j in
                       enumerate(self.init.gs_img_objects, 1)]
    else:
      msg = "Importing {} images".format(len(self.init.input_list))
      title = 'IMPORTING IMAGES'
      self.img_list = [[i, len(self.init.input_list) + 1, j] for i, j in
                       enumerate(self.init.input_list, 1)]

    cmd.Command.start(msg)
    self.prog_count = 0
    self.gs_prog = cmd.ProgressBar(title=title)
    self.img_objects = parallel_map(iterable=self.img_list,
                                    func=self.proc_wrapper,
                                    callback=self.callback,
                                    processes=self.init.params.n_processors)

Example #46

0

Show file

File: mp.py Project: dials/dials

def batch_parallel_map(func=None,
                       iterable=None,
                       processes=None,
                       callback=None,
                       method=None,
                       chunksize=1):
  '''
  A function to run jobs in batches in each process

  '''
  from libtbx import easy_mp

  # Call the batches in parallel
  return easy_mp.parallel_map(
    func=BatchFunc(func),
    iterable=BatchIterable(iterable, chunksize),
    processes=processes,
    callback=BatchCallback(callback),
    method=method,
    preserve_order=True,
    preserve_exception_message=True)

Example #47

0

Show file

File: obsolete_predictors.py Project: dials/dials_scratch

  def step_over_images(self):
    """Loop over images, doing the search on each and extending the
    predictions list"""

    from libtbx import easy_mp
    #from dials.util import mp
    n_images = self._scan.get_num_images()

    # Change the number of processors if necessary
    nproc = 1
    if nproc > n_images:
      nproc = n_images

    iterable = self._make_blocks(n_images, nproc)

    ref_list_of_list = easy_mp.parallel_map(
        func=self._search_on_image_range,
        iterable=iterable,
        processes=nproc,
        method="multiprocessing",
        preserve_order=True)

    self._reflections = [e for l in ref_list_of_list for e in l]
    return

Example #48

0

Show file

File: xia2_main.py Project: xia2/xia2

def xia2_main(stop_after=None):
  '''Actually process something...'''

  Citations.cite('xia2')

  # print versions of related software
  from dials.util.version import dials_version
  Chatter.write(dials_version())

  start_time = time.time()

  CommandLine = get_command_line()
  start_dir = Flags.get_starting_directory()

  # check that something useful has been assigned for processing...
  xtals = CommandLine.get_xinfo().get_crystals()

  no_images = True

  for name in xtals.keys():
    xtal = xtals[name]

    if not xtal.get_all_image_names():

      Chatter.write('-----------------------------------' + \
                    '-' * len(name))
      Chatter.write('| No images assigned for crystal %s |' % name)
      Chatter.write('-----------------------------------' + '-' \
                    * len(name))
    else:
      no_images = False

  args = []

  from xia2.Handlers.Phil import PhilIndex
  params = PhilIndex.get_python_object()
  mp_params = params.xia2.settings.multiprocessing
  njob = mp_params.njob

  from libtbx import group_args

  xinfo = CommandLine.get_xinfo()

  if os.path.exists('xia2.json'):
    from xia2.Schema.XProject import XProject
    xinfo_new = xinfo
    xinfo = XProject.from_json(filename='xia2.json')

    crystals = xinfo.get_crystals()
    crystals_new = xinfo_new.get_crystals()
    for crystal_id in crystals_new.keys():
      if crystal_id not in crystals:
        crystals[crystal_id] = crystals_new[crystal_id]
        continue
      crystals[crystal_id]._scaler = None # reset scaler
      for wavelength_id in crystals_new[crystal_id].get_wavelength_names():
        wavelength_new = crystals_new[crystal_id].get_xwavelength(wavelength_id)
        if wavelength_id not in crystals[crystal_id].get_wavelength_names():
          crystals[crystal_id].add_wavelength(
            crystals_new[crystal_id].get_xwavelength(wavelength_new))
          continue
        wavelength = crystals[crystal_id].get_xwavelength(wavelength_id)
        sweeps_new = wavelength_new.get_sweeps()
        sweeps = wavelength.get_sweeps()
        sweep_names = [s.get_name() for s in sweeps]
        sweep_keys = [
          (s.get_directory(), s.get_template(), s.get_image_range())
          for s in sweeps]
        for sweep in sweeps_new:
          if ((sweep.get_directory(), sweep.get_template(),
               sweep.get_image_range()) not in sweep_keys):
            if sweep.get_name() in sweep_names:
              i = 1
              while 'SWEEEP%i' %i in sweep_names:
                i += 1
              sweep._name = 'SWEEP%i' %i
              break
            wavelength.add_sweep(
              name=sweep.get_name(),
              directory=sweep.get_directory(),
              image=sweep.get_image(),
              beam=sweep.get_beam_centre(),
              reversephi=sweep.get_reversephi(),
              distance=sweep.get_distance(),
              gain=sweep.get_gain(),
              dmin=sweep.get_resolution_high(),
              dmax=sweep.get_resolution_low(),
              polarization=sweep.get_polarization(),
              frames_to_process=sweep.get_frames_to_process(),
              user_lattice=sweep.get_user_lattice(),
              user_cell=sweep.get_user_cell(),
              epoch=sweep._epoch,
              ice=sweep._ice,
              excluded_regions=sweep._excluded_regions,
            )
            sweep_names.append(sweep.get_name())

  crystals = xinfo.get_crystals()

  failover = params.xia2.settings.failover

  if njob > 1:
    driver_type = mp_params.type
    command_line_args = CommandLine.get_argv()[1:]
    for crystal_id in crystals.keys():
      for wavelength_id in crystals[crystal_id].get_wavelength_names():
        wavelength = crystals[crystal_id].get_xwavelength(wavelength_id)
        sweeps = wavelength.get_sweeps()
        for sweep in sweeps:
          sweep._get_indexer()
          sweep._get_refiner()
          sweep._get_integrater()
          args.append((
            group_args(
              driver_type=driver_type,
              stop_after=stop_after,
              failover=failover,
              command_line_args=command_line_args,
              nproc=mp_params.nproc,
              crystal_id=crystal_id,
              wavelength_id=wavelength_id,
              sweep_id=sweep.get_name(),
              ),))

    from xia2.Driver.DriverFactory import DriverFactory
    default_driver_type = DriverFactory.get_driver_type()

    # run every nth job on the current computer (no need to submit to qsub)
    for i_job, arg in enumerate(args):
      if (i_job % njob) == 0:
        arg[0].driver_type = default_driver_type

    if mp_params.type == "qsub":
      method = "sge"
    else:
      method = "multiprocessing"
    nproc = mp_params.nproc
    qsub_command = mp_params.qsub_command
    if not qsub_command:
      qsub_command = 'qsub'
    qsub_command = '%s -V -cwd -pe smp %d' %(qsub_command, nproc)

    from libtbx import easy_mp
    results = easy_mp.parallel_map(
      process_one_sweep, args, processes=njob,
      #method=method,
      method="multiprocessing",
      qsub_command=qsub_command,
      preserve_order=True,
      preserve_exception_message=True)

    # Hack to update sweep with the serialized indexers/refiners/integraters
    i_sweep = 0
    for crystal_id in crystals.keys():
      for wavelength_id in crystals[crystal_id].get_wavelength_names():
        wavelength = crystals[crystal_id].get_xwavelength(wavelength_id)
        remove_sweeps = []
        sweeps = wavelength.get_sweeps()
        for sweep in sweeps:
          success, output, xsweep_dict = results[i_sweep]
          assert xsweep_dict is not None
          if output is not None:
            Chatter.write(output)
          if not success:
            Chatter.write('Sweep failed: removing %s' %sweep.get_name())
            remove_sweeps.append(sweep)
          else:
            Chatter.write('Loading sweep: %s' % sweep.get_name())
            from xia2.Schema.XSweep import XSweep
            new_sweep = XSweep.from_dict(xsweep_dict)
            sweep._indexer = new_sweep._indexer
            sweep._refiner = new_sweep._refiner
            sweep._integrater = new_sweep._integrater
          i_sweep += 1
        for sweep in remove_sweeps:
          wavelength.remove_sweep(sweep)
          sample = sweep.get_xsample()
          sample.remove_sweep(sweep)

  else:
    for crystal_id in crystals.keys():
      for wavelength_id in crystals[crystal_id].get_wavelength_names():
        wavelength = crystals[crystal_id].get_xwavelength(wavelength_id)
        remove_sweeps = []
        sweeps = wavelength.get_sweeps()
        for sweep in sweeps:
          try:
            if stop_after == 'index':
              sweep.get_indexer_cell()
            else:
              sweep.get_integrater_intensities()
            sweep.serialize()
          except Exception, e:
            if failover:
              Chatter.write('Processing sweep %s failed: %s' % \
                            (sweep.get_name(), str(e)))
              remove_sweeps.append(sweep)
            else:
              raise
        for sweep in remove_sweeps:
          wavelength.remove_sweep(sweep)
          sample = sweep.get_xsample()
          sample.remove_sweep(sweep)

Example #49

0

Show file

File: collate_results.py Project: dials/dials_scratch

def run(args):
  sweep_directories = []
  templates = []
  n_strong_spots = flex.int()
  n_strong_spots_dmin_4 = flex.int()
  d_strong_spots_99th_percentile = flex.double()
  d_strong_spots_95th_percentile = flex.double()
  d_strong_spots_50th_percentile = flex.double()
  n_unindexed_spots = flex.int()
  n_indexed_lattices = flex.int()
  n_integrated_lattices = flex.int()
  sweep_dir_cryst = flex.std_string()

  orig_dir = os.path.abspath(os.curdir)

  rmsds = flex.vec3_double()
  cell_params = flex.sym_mat3_double()
  n_indexed = flex.double()
  d_min_indexed = flex.double()
  rmsds = flex.vec3_double()

  nproc = easy_mp.get_processes(libtbx.Auto)
  #nproc = 1
  results = easy_mp.parallel_map(
    func=run_once,
    iterable=args,
    processes=nproc,
    method="multiprocessing",
    preserve_order=True,
    asynchronous=True,
    preserve_exception_message=True,
  )

  for result in results:
    if result is None: continue
    sweep_directories.append(result.sweep_dir)
    templates.append(result.template)
    n_strong_spots.append(result.n_strong_spots)
    n_strong_spots_dmin_4.append(result.n_strong_spots_dmin_4)
    n_unindexed_spots.append(result.n_unindexed_spots)
    n_indexed_lattices.append(result.n_indexed_lattices)
    n_integrated_lattices.append(result.n_integrated_lattices)
    d_strong_spots_50th_percentile.append(result.d_strong_spots_50th_percentile)
    d_strong_spots_95th_percentile.append(result.d_strong_spots_95th_percentile)
    d_strong_spots_99th_percentile.append(result.d_strong_spots_99th_percentile)
    cell_params.extend(result.cell_params)
    n_indexed.extend(result.n_indexed)
    d_min_indexed.extend(result.d_min_indexed)
    rmsds.extend(result.rmsds)
    sweep_dir_cryst.extend(result.sweep_dir_cryst)

  table_data = [('sweep_dir', 'template', '#strong_spots', '#unindexed_spots', '#lattices',
                 'd_spacing_50th_percentile', 'd_spacing_95th_percentile',
                 'd_spacing_99th_percentile',)]
  for i in range(len(sweep_directories)):
    table_data.append((sweep_directories[i],
                       templates[i],
                       str(n_strong_spots[i]),
                       str(n_unindexed_spots[i]),
                       str(n_indexed_lattices[i]),
                       str(d_strong_spots_50th_percentile[i]),
                       str(d_strong_spots_95th_percentile[i]),
                       str(d_strong_spots_99th_percentile[i]),
                       ))

  with open('results.txt', 'wb') as f:
    print >> f, table_utils.format(
      table_data, has_header=True, justify='right')

  table_data = [('sweep_dir', 'cell_a', 'cell_b', 'cell_c', 'alpha', 'beta', 'gamma',
                 '#indexed_reflections', 'd_min_indexed',
                 'rmsd_x', 'rmsd_y', 'rmsd_phi')]
  for i in range(len(cell_params)):
    table_data.append((sweep_dir_cryst[i],
                       str(cell_params[i][0]),
                       str(cell_params[i][1]),
                       str(cell_params[i][2]),
                       str(cell_params[i][3]),
                       str(cell_params[i][4]),
                       str(cell_params[i][5]),
                       str(n_indexed[i]),
                       str(d_min_indexed[i]),
                       str(rmsds[i][0]),
                       str(rmsds[i][1]),
                       str(rmsds[i][2]),
                       ))

  with open('results_indexed.txt', 'wb') as f:
    print >> f, table_utils.format(
      table_data, has_header=True, justify='right')

  cell_a = flex.double([params[0] for params in cell_params])
  cell_b = flex.double([params[1] for params in cell_params])
  cell_c = flex.double([params[2] for params in cell_params])
  cell_alpha = flex.double([params[3] for params in cell_params])
  cell_beta = flex.double([params[4] for params in cell_params])
  cell_gamma = flex.double([params[5] for params in cell_params])

  from matplotlib import pyplot
  from matplotlib.backends.backend_pdf import PdfPages

  pyplot.rc('font', family='serif')
  pyplot.rc('font', serif='Times New Roman')

  red, blue = '#B2182B', '#2166AC'
  hist = flex.histogram(n_strong_spots_dmin_4.as_double(), n_slots=20)
  hist.show()
  fig = pyplot.figure()
  ax = fig.add_subplot(1,1,1)
  ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(),
         color=blue, edgecolor=blue)
  ax.set_xlabel('Spot count')
  ax.set_ylabel('Frequency')
  pdf = PdfPages("spot_count_histogram.pdf")
  pdf.savefig(fig)
  pdf.close()
  #pyplot.show()

  hist = flex.histogram(n_indexed_lattices.as_double(),
                        n_slots=flex.max(n_indexed_lattices))
  hist.show()
  fig = pyplot.figure()
  ax = fig.add_subplot(1,1,1)
  ax.bar(range(int(hist.data_max())), hist.slots(),
         width=0.75*hist.slot_width(), align='center',
         color=blue, edgecolor=blue)
  ax.set_xlim(-0.5, hist.data_max()-0.5)
  ax.set_xticks(range(0,int(hist.data_max())))
  ax.set_xlabel('Number of indexed lattices')
  ax.set_ylabel('Frequency')
  pdf = PdfPages("n_indexed_lattices_histogram.pdf")
  pdf.savefig(fig)
  pdf.close()
  #pyplot.show()

  if flex.max(n_integrated_lattices) > 0:
    hist = flex.histogram(n_integrated_lattices.as_double(),
                          n_slots=flex.max(n_integrated_lattices))
    hist.show()
    fig = pyplot.figure()
    ax = fig.add_subplot(1,1,1)
    ax.bar(range(int(hist.data_max())), hist.slots(),
           width=0.75*hist.slot_width(),
           align='center', color=blue, edgecolor=blue)
    ax.set_xlim(-0.5, hist.data_max()-0.5)
    ax.set_xticks(range(0,int(hist.data_max())))
    ax.set_xlabel('Number of integrated lattices')
    ax.set_ylabel('Frequency')
    pdf = PdfPages("n_integrated_lattices_histogram.pdf")
    pdf.savefig(fig)
    pdf.close()
    #pyplot.show()

  fig, axes = pyplot.subplots(nrows=2, ncols=3, squeeze=False)
  for i, cell_param in enumerate(
    (cell_a, cell_b, cell_c, cell_alpha, cell_beta, cell_gamma)):
    ax = axes.flat[i]
    flex.min_max_mean_double(cell_param).show()
    print flex.median(cell_param)
    hist = flex.histogram(cell_param, n_slots=20)
    hist.show()
    ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(),
           color=blue, edgecolor=blue)
    ax.set_xlabel('Cell parameter')
    ax.set_ylabel('Frequency')
  pyplot.tight_layout()
  pdf = PdfPages("cell_parameters.pdf")
  pdf.savefig(fig)
  pdf.close()

Example #50

0

Show file

File: engine.py Project: dials/dials

    def build_up(self, objective_only=False):

        # code here to calculate the residuals. Rely on the target class
        # for this

        # I need to use the weights. They are the variances of the
        # observations... See http://en.wikipedia.org/wiki/Non-linear_least_squares
        # at 'diagonal weight matrix'

        # set current parameter values
        self.prepare_for_step()

        # Reset the state to construction time, i.e. no equations accumulated
        self.reset()

        # observation terms
        if objective_only:
            residuals, weights = self._target.compute_residuals()
            self.add_residuals(residuals, weights)
        else:
            blocks = self._target.split_matches_into_blocks(nproc=self._nproc)

            if self._nproc > 1:

                # ensure the jacobian is not tracked
                self._jacobian = None

                # processing functions
                def task_wrapper(block):
                    residuals, jacobian, weights = self._target.compute_residuals_and_gradients(block)
                    return dict(residuals=residuals, jacobian=jacobian, weights=weights)

                def callback_wrapper(result):
                    self.add_equations(result["residuals"], result["jacobian"], result["weights"])
                    # no longer need the result
                    result["residuals"] = None
                    result["jacobian"] = None
                    result["weights"] = None
                    return

                task_results = easy_mp.parallel_map(
                    func=task_wrapper,
                    iterable=blocks,
                    processes=self._nproc,
                    callback=callback_wrapper,
                    method="multiprocessing",
                    # preserve_exception_message=True
                )

            else:
                for block in blocks:
                    residuals, self._jacobian, weights = self._target.compute_residuals_and_gradients(block)
                    self.add_equations(residuals, self._jacobian, weights)

        # restraints terms
        restraints = self._target.compute_restraints_residuals_and_gradients()
        if restraints:
            if objective_only:
                self.add_residuals(restraints[0], restraints[2])
            else:
                self.add_equations(restraints[0], restraints[1], restraints[2])
        return

Example #51

0

Show file

File: overload.py Project: xia2/xia2

def build_hist(nproc=1):
  from scitbx.array_family import flex
  from libtbx import easy_mp
  from collections import Counter

  # FIXME use proper optionparser here. This works for now
  if len(sys.argv) >= 2 and sys.argv[1].startswith('nproc='):
    nproc=int(sys.argv[1][6:])
    sys.argv = sys.argv[1:]
  if len(sys.argv) == 2 and sys.argv[1].endswith('.json'):
    from dxtbx import datablock
    db = datablock.DataBlockFactory.from_json_file(sys.argv[1])[0]
    image_list = db.extract_imagesets()[0].paths()
  else:
    image_list = sys.argv[1:]
  image_count = len(image_list)

  # Faster, yet still less than ideal and wasting a lot of resources.
  limit = get_overload(image_list[0])
  binfactor = 5 # register up to 500% counts
  histmax = (limit * binfactor) + 0.0
  histbins = int(limit * binfactor) + 1
  use_python_counter = histbins > 90000000 # empirically determined

  print "Processing %d images in %d processes using %s\n" % (image_count, nproc, \
    "python Counter" if use_python_counter else "flex arrays")

  def process_image(process):
    import sys
    last_update = start = timeit.default_timer()

    i = process
    if use_python_counter:
      local_hist = Counter()
    else:
      local_hist = flex.histogram(flex.double(), data_min=0.0, data_max=histmax, n_slots=histbins)

    max_images = image_count // nproc
    if process >= image_count % nproc:
      max_images += 1
    while i < image_count:
      data = read_cbf_image(image_list[i])
      if not use_python_counter:
        data = flex.histogram(data.as_double().as_1d(), data_min=0.0, data_max=histmax, n_slots=histbins)
      local_hist.update(data)
      i = i + nproc
      if process == 0:
        if timeit.default_timer() > (last_update + 3):
          last_update = timeit.default_timer()
          if sys.stdout.isatty():
            sys.stdout.write('\033[A')
          print 'Processed %d%% (%d seconds remain)    ' % (100 * i // image_count, round((image_count - i) * (last_update - start) / (i+1)))
    return local_hist

  results = easy_mp.parallel_map(
    func=process_image,
    iterable=range(nproc),
    processes=nproc,
    preserve_exception_message=True)

  print "Merging results"
  result_hist = None
  for hist in results:
    if result_hist is None:
      result_hist = hist
    else:
      result_hist.update(hist)

  if not use_python_counter:
    # reformat histogram into dictionary
    result = list(result_hist.slots())
    result_hist = { b: count for b, count in enumerate(result) if count > 0 }

  results = { 'scale_factor': 1 / limit,
              'overload_limit': limit,
              'counts': result_hist }

  print "Writing results to overload.json"
  with open('overload.json', 'w') as fh:
    json.dump(results, fh, indent=1, sort_keys=True)

Example #52

0

Show file

File: XDSScalerA.py Project: xia2/xia2

  def _scale_prepare(self):
    '''Prepare the data for scaling - this will reindex it the
    reflections to the correct pointgroup and setting, for instance,
    and move the reflection files to the scale directory.'''

    Citations.cite('xds')
    Citations.cite('ccp4')
    Citations.cite('pointless')

    # GATHER phase - get the reflection files together... note that
    # it is not necessary in here to keep the batch information as we
    # don't wish to rebatch the reflections prior to scaling.
    # FIXME need to think about what I will do about the radiation
    # damage analysis in here...

    self._sweep_information = { }

    # FIXME in here I want to record the batch number to
    # epoch mapping as per the CCP4 Scaler implementation.

    Journal.block(
        'gathering', self.get_scaler_xcrystal().get_name(), 'XDS',
        {'working directory':self.get_working_directory()})

    for epoch in self._scalr_integraters.keys():
      intgr = self._scalr_integraters[epoch]
      pname, xname, dname = intgr.get_integrater_project_info()
      sname = intgr.get_integrater_sweep_name()
      self._sweep_information[epoch] = {
          'pname':pname,
          'xname':xname,
          'dname':dname,
          'integrater':intgr,
          'corrected_intensities':intgr.get_integrater_corrected_intensities(),
          'prepared_reflections':None,
          'scaled_reflections':None,
          'header':intgr.get_header(),
          'batches':intgr.get_integrater_batches(),
          'image_to_epoch':intgr.get_integrater_sweep(
          ).get_image_to_epoch(),
          'image_to_dose':{},
          'batch_offset':0,
          'sname':sname
          }

      Journal.entry({'adding data from':'%s/%s/%s' % \
                     (xname, dname, sname)})

      # what are these used for?
      # pname / xname / dname - dataset identifiers
      # image to epoch / batch offset / batches - for RD analysis

      Debug.write('For EPOCH %s have:' % str(epoch))
      Debug.write('ID = %s/%s/%s' % (pname, xname, dname))
      Debug.write('SWEEP = %s' % intgr.get_integrater_sweep_name())

    # next work through all of the reflection files and make sure that
    # they are XDS_ASCII format...

    epochs = self._sweep_information.keys()
    epochs.sort()

    self._first_epoch = min(epochs)

    self._scalr_pname = self._sweep_information[epochs[0]]['pname']
    self._scalr_xname = self._sweep_information[epochs[0]]['xname']

    for epoch in epochs:
      intgr = self._scalr_integraters[epoch]
      pname = self._sweep_information[epoch]['pname']
      xname = self._sweep_information[epoch]['xname']
      dname = self._sweep_information[epoch]['dname']
      sname = self._sweep_information[epoch]['sname']
      if self._scalr_pname != pname:
        raise RuntimeError, 'all data must have a common project name'
      xname = self._sweep_information[epoch]['xname']
      if self._scalr_xname != xname:
        raise RuntimeError, \
              'all data for scaling must come from one crystal'

      xsh = XDSScalerHelper()
      xsh.set_working_directory(self.get_working_directory())
      hklin = self._sweep_information[epoch]['corrected_intensities']
      hklout = os.path.join(self.get_working_directory(),
                            '%s_%s_%s_%s_CORRECTED.HKL' %(
                              pname, xname, dname, sname))
      sweep = intgr.get_integrater_sweep()
      if sweep.get_frames_to_process() is not None:
        offset = intgr.get_frame_offset()
        #print "offset: %d" %offset
        start, end = sweep.get_frames_to_process()
        start -= offset
        end -= offset
        #end += 1 ????
        #print "limiting batches: %d-%d" %(start, end)
        xsh.limit_batches(hklin, hklout, start, end)
        self._sweep_information[epoch]['corrected_intensities'] = hklout

    # if there is more than one sweep then compare the lattices
    # and eliminate all but the lowest symmetry examples if
    # there are more than one...

    # -------------------------------------------------
    # Ensure that the integration lattices are the same
    # -------------------------------------------------

    need_to_return = False

    if len(self._sweep_information.keys()) > 1:

      lattices = []

      # FIXME run this stuff in parallel as well...

      for epoch in self._sweep_information.keys():

        intgr = self._sweep_information[epoch]['integrater']
        hklin = self._sweep_information[epoch]['corrected_intensities']
        refiner = intgr.get_integrater_refiner()

        if self._scalr_input_pointgroup:
          pointgroup = self._scalr_input_pointgroup
          reindex_op = 'h,k,l'
          ntr = False

        else:

          pointgroup, reindex_op, ntr = \
                      self._pointless_indexer_jiffy(hklin, refiner)

          Debug.write('X1698: %s: %s' % (pointgroup, reindex_op))

        lattice = Syminfo.get_lattice(pointgroup)

        if not lattice in lattices:
          lattices.append(lattice)

        if ntr:

          # if we need to return, we should logically reset
          # any reindexing operator right? right here all
          # we are talking about is the correctness of
          # individual pointgroups?? Bug # 3373

          reindex_op = 'h,k,l'
          # actually, should this not be done "by magic"
          # when a new pointgroup is assigned in the
          # pointless indexer jiffy above?!

          intgr.set_integrater_reindex_operator(
              reindex_op, compose = False)

          need_to_return = True

      # bug # 2433 - need to ensure that all of the lattice
      # conclusions were the same...

      if len(lattices) > 1:
        ordered_lattices = []
        for l in lattices_in_order():
          if l in lattices:
            ordered_lattices.append(l)

        correct_lattice = ordered_lattices[0]
        Debug.write('Correct lattice asserted to be %s' % \
                    correct_lattice)

        # transfer this information back to the indexers
        for epoch in self._sweep_information.keys():
          integrater = self._sweep_information[
              epoch]['integrater']
          refiner = integrater.get_integrater_refiner()
          sname = integrater.get_integrater_sweep_name()

          if not refiner:
            continue

          state = refiner.set_refiner_asserted_lattice(
              correct_lattice)
          if state == refiner.LATTICE_CORRECT:
            Debug.write('Lattice %s ok for sweep %s' % \
                        (correct_lattice, sname))
          elif state == refiner.LATTICE_IMPOSSIBLE:
            raise RuntimeError, 'Lattice %s impossible for %s' % \
                  (correct_lattice, sname)
          elif state == refiner.LATTICE_POSSIBLE:
            Debug.write('Lattice %s assigned for sweep %s' % \
                        (correct_lattice, sname))
            need_to_return = True

    # if one or more of them was not in the lowest lattice,
    # need to return here to allow reprocessing

    if need_to_return:
      self.set_scaler_done(False)
      self.set_scaler_prepare_done(False)
      return

    # next if there is more than one sweep then generate
    # a merged reference reflection file to check that the
    # setting for all reflection files is the same...

    # if we get to here then all data was processed with the same
    # lattice

    # ----------------------------------------------------------
    # next ensure that all sweeps are set in the correct setting
    # ----------------------------------------------------------

    if self.get_scaler_reference_reflection_file():
      self._reference = self.get_scaler_reference_reflection_file()
      Debug.write('Using HKLREF %s' % self._reference)

      md = self._factory.Mtzdump()
      md.set_hklin(self.get_scaler_reference_reflection_file())
      md.dump()

      self._xds_spacegroup = Syminfo.spacegroup_name_to_number(
          md.get_spacegroup())

      Debug.write('Spacegroup %d' % self._xds_spacegroup)

    elif PhilIndex.params.xia2.settings.scale.reference_reflection_file:
      self._reference = PhilIndex.params.xia2.settings.scale.reference_reflection_file

      Debug.write('Using HKLREF %s' % self._reference)

      md = self._factory.Mtzdump()
      md.set_hklin(PhilIndex.params.xia2.settings.scale.reference_reflection_file)
      md.dump()

      self._xds_spacegroup = Syminfo.spacegroup_name_to_number(
          md.get_spacegroup())

      Debug.write('Spacegroup %d' % self._xds_spacegroup)

    params = PhilIndex.params
    use_brehm_diederichs = params.xia2.settings.use_brehm_diederichs
    if len(self._sweep_information.keys()) > 1 and use_brehm_diederichs:
      brehm_diederichs_files_in = []
      for epoch in self._sweep_information.keys():

        intgr = self._sweep_information[epoch]['integrater']
        hklin = self._sweep_information[epoch]['corrected_intensities']
        refiner = intgr.get_integrater_refiner()

        # in here need to consider what to do if the user has
        # assigned the pointgroup on the command line ...

        if not self._scalr_input_pointgroup:
          pointgroup, reindex_op, ntr = \
                      self._pointless_indexer_jiffy(hklin, refiner)

          if ntr:

            # Bug # 3373

            Debug.write('Reindex to standard (PIJ): %s' % \
                        reindex_op)

            intgr.set_integrater_reindex_operator(
                reindex_op, compose = False)
            reindex_op = 'h,k,l'
            need_to_return = True

        else:

          # 27/FEB/08 to support user assignment of pointgroups

          Debug.write('Using input pointgroup: %s' % \
                      self._scalr_input_pointgroup)
          pointgroup = self._scalr_input_pointgroup
          reindex_op = 'h,k,l'

        intgr.set_integrater_reindex_operator(reindex_op)
        intgr.set_integrater_spacegroup_number(
            Syminfo.spacegroup_name_to_number(pointgroup))
        self._sweep_information[epoch]['corrected_intensities'] \
          = intgr.get_integrater_corrected_intensities()

        # convert the XDS_ASCII for this sweep to mtz - on the next
        # get this should be in the correct setting...

        dname = self._sweep_information[epoch]['dname']
        sname = intgr.get_integrater_sweep_name()
        hklin = self._sweep_information[epoch]['corrected_intensities']
        hklout = os.path.join(self.get_working_directory(),
                              '%s_%s.mtz' % (dname, sname))

        FileHandler.record_temporary_file(hklout)

        # now use pointless to make this conversion

        pointless = self._factory.Pointless()
        pointless.set_xdsin(hklin)
        pointless.set_hklout(hklout)
        pointless.xds_to_mtz()
        brehm_diederichs_files_in.append(hklout)

      # now run cctbx.brehm_diederichs to figure out the indexing hand for
      # each sweep
      from xia2.Wrappers.Cctbx.BrehmDiederichs import BrehmDiederichs
      brehm_diederichs = BrehmDiederichs()
      brehm_diederichs.set_working_directory(self.get_working_directory())
      auto_logfiler(brehm_diederichs)
      brehm_diederichs.set_input_filenames(brehm_diederichs_files_in)
      # 1 or 3? 1 seems to work better?
      brehm_diederichs.set_asymmetric(1)
      brehm_diederichs.run()
      reindexing_dict = brehm_diederichs.get_reindexing_dict()

      for epoch in self._sweep_information.keys():

        intgr = self._sweep_information[epoch]['integrater']

        dname = self._sweep_information[epoch]['dname']
        sname = intgr.get_integrater_sweep_name()
        hklin = self._sweep_information[epoch]['corrected_intensities']
        hklout = os.path.join(self.get_working_directory(),
                              '%s_%s.mtz' % (dname, sname))

        # apply the reindexing operator
        intgr.set_integrater_reindex_operator(reindex_op)

        # and copy the reflection file to the local directory
        hklin = self._sweep_information[epoch]['corrected_intensities']
        hklout = os.path.join(self.get_working_directory(),
                              '%s_%s.HKL' % (dname, sname))

        Debug.write('Copying %s to %s' % (hklin, hklout))
        shutil.copyfile(hklin, hklout)

        # record just the local file name...
        self._sweep_information[epoch][
            'prepared_reflections'] = os.path.split(hklout)[-1]

    elif len(self._sweep_information.keys()) > 1 and \
           not self._reference:
      # need to generate a reference reflection file - generate this
      # from the reflections in self._first_epoch
      #
      # FIXME this should really use the Brehm and Diederichs method
      # if you have lots of little sweeps...

      intgr = self._sweep_information[self._first_epoch]['integrater']

      hklin = self._sweep_information[epoch]['corrected_intensities']
      refiner = intgr.get_integrater_refiner()

      if self._scalr_input_pointgroup:
        Debug.write('Using input pointgroup: %s' % \
                    self._scalr_input_pointgroup)
        pointgroup = self._scalr_input_pointgroup
        ntr = False
        reindex_op = 'h,k,l'

      else:
        pointgroup, reindex_op, ntr = self._pointless_indexer_jiffy(
            hklin, refiner)

        Debug.write('X1698: %s: %s' % (pointgroup, reindex_op))

      reference_reindex_op = intgr.get_integrater_reindex_operator()

      if ntr:

        # Bug # 3373

        intgr.set_integrater_reindex_operator(
            reindex_op, compose = False)
        reindex_op = 'h,k,l'
        need_to_return = True

      self._xds_spacegroup = Syminfo.spacegroup_name_to_number(pointgroup)

      # next pass this reindexing operator back to the source
      # of the reflections

      intgr.set_integrater_reindex_operator(reindex_op)
      intgr.set_integrater_spacegroup_number(
          Syminfo.spacegroup_name_to_number(pointgroup))
      self._sweep_information[epoch]['corrected_intensities'] \
        = intgr.get_integrater_corrected_intensities()

      hklin = self._sweep_information[epoch]['corrected_intensities']

      hklout = os.path.join(self.get_working_directory(),
                            'xds-pointgroup-reference-unsorted.mtz')
      FileHandler.record_temporary_file(hklout)

      # now use pointless to handle this conversion

      pointless = self._factory.Pointless()
      pointless.set_xdsin(hklin)
      pointless.set_hklout(hklout)
      pointless.xds_to_mtz()

      self._reference = hklout

    if self._reference:

      from xia2.Driver.DriverFactory import DriverFactory

      def run_one_sweep(args):
        sweep_information = args[0]
        pointless_indexer_jiffy = args[1]
        factory = args[2]
        job_type = args[3]

        if job_type:
          DriverFactory.set_driver_type(job_type)

        intgr = sweep_information['integrater']
        hklin = sweep_information['corrected_intensities']
        refiner = intgr.get_integrater_refiner()

        # in here need to consider what to do if the user has
        # assigned the pointgroup on the command line ...

        if not self._scalr_input_pointgroup:
          pointgroup, reindex_op, ntr = \
                      self._pointless_indexer_jiffy(hklin, refiner)

          if ntr:

            # Bug # 3373

            Debug.write('Reindex to standard (PIJ): %s' % \
                        reindex_op)

            intgr.set_integrater_reindex_operator(
                reindex_op, compose = False)
            reindex_op = 'h,k,l'
            need_to_return = True

        else:

          # 27/FEB/08 to support user assignment of pointgroups

          Debug.write('Using input pointgroup: %s' % \
                      self._scalr_input_pointgroup)
          pointgroup = self._scalr_input_pointgroup
          reindex_op = 'h,k,l'

        intgr.set_integrater_reindex_operator(reindex_op)
        intgr.set_integrater_spacegroup_number(
            Syminfo.spacegroup_name_to_number(pointgroup))
        sweep_information['corrected_intensities'] \
          = intgr.get_integrater_corrected_intensities()

        # convert the XDS_ASCII for this sweep to mtz - on the next
        # get this should be in the correct setting...

        hklin = sweep_information['corrected_intensities']

        # now use pointless to make this conversion

        # try with no conversion?!

        pointless = self._factory.Pointless()
        pointless.set_xdsin(hklin)
        hklout = os.path.join(
          self.get_working_directory(),
          '%d_xds-pointgroup-unsorted.mtz' %pointless.get_xpid())
        FileHandler.record_temporary_file(hklout)
        pointless.set_hklout(hklout)
        pointless.xds_to_mtz()

        pointless = self._factory.Pointless()
        pointless.set_hklin(hklout)
        pointless.set_hklref(self._reference)
        pointless.decide_pointgroup()

        pointgroup = pointless.get_pointgroup()
        reindex_op = pointless.get_reindex_operator()

        # for debugging print out the reindexing operations and
        # what have you...

        Debug.write('Reindex to standard: %s' % reindex_op)

        # this should send back enough information that this
        # is in the correct pointgroup (from the call above) and
        # also in the correct setting, from the interaction
        # with the reference set... - though I guess that the
        # spacegroup number should not have changed, right?

        # set the reindex operation afterwards... though if the
        # spacegroup number is the same this should make no
        # difference, right?!

        intgr.set_integrater_spacegroup_number(
            Syminfo.spacegroup_name_to_number(pointgroup))
        intgr.set_integrater_reindex_operator(reindex_op)
        sweep_information['corrected_intensities'] \
          = intgr.get_integrater_corrected_intensities()

        # and copy the reflection file to the local directory

        dname = sweep_information['dname']
        sname = intgr.get_integrater_sweep_name()
        hklin = sweep_information['corrected_intensities']
        hklout = os.path.join(self.get_working_directory(),
                              '%s_%s.HKL' % (dname, sname))

        Debug.write('Copying %s to %s' % (hklin, hklout))
        shutil.copyfile(hklin, hklout)

        # record just the local file name...
        sweep_information['prepared_reflections'] = os.path.split(hklout)[-1]
        return sweep_information

      from libtbx import easy_mp
      params = PhilIndex.get_python_object()
      mp_params = params.xia2.settings.multiprocessing
      njob = mp_params.njob

      if njob > 1:
        # cache drivertype
        drivertype = DriverFactory.get_driver_type()

        args = [
          (self._sweep_information[epoch], self._pointless_indexer_jiffy,
           self._factory, mp_params.type)
                for epoch in self._sweep_information.keys()]
        results_list = easy_mp.parallel_map(
          run_one_sweep, args, params=None,
          processes=njob,
          method="threading",
          asynchronous=True,
          callback=None,
          preserve_order=True,
          preserve_exception_message=True)

        # restore drivertype
        DriverFactory.set_driver_type(drivertype)

        # results should be given back in the same order
        for i, epoch in enumerate(self._sweep_information.keys()):
          self._sweep_information[epoch] = results_list[i]

      else:
        for epoch in self._sweep_information.keys():
          self._sweep_information[epoch] = run_one_sweep(
            (self._sweep_information[epoch], self._pointless_indexer_jiffy,
             self._factory, None))

    else:
      # convert the XDS_ASCII for this sweep to mtz

      epoch = self._first_epoch
      intgr = self._sweep_information[epoch]['integrater']
      refiner = intgr.get_integrater_refiner()
      sname = intgr.get_integrater_sweep_name()

      hklout = os.path.join(self.get_working_directory(),
                            '%s-pointless.mtz' % sname)
      FileHandler.record_temporary_file(hklout)

      pointless = self._factory.Pointless()
      pointless.set_xdsin(self._sweep_information[epoch]['corrected_intensities'])
      pointless.set_hklout(hklout)
      pointless.xds_to_mtz()

      # run it through pointless interacting with the
      # Indexer which belongs to this sweep

      hklin = hklout

      if self._scalr_input_pointgroup:
        Debug.write('Using input pointgroup: %s' % \
                    self._scalr_input_pointgroup)
        pointgroup = self._scalr_input_pointgroup
        ntr = False
        reindex_op = 'h,k,l'

      else:
        pointgroup, reindex_op, ntr = self._pointless_indexer_jiffy(
            hklin, refiner)

      if ntr:

        # if we need to return, we should logically reset
        # any reindexing operator right? right here all
        # we are talking about is the correctness of
        # individual pointgroups?? Bug # 3373

        reindex_op = 'h,k,l'
        intgr.set_integrater_reindex_operator(
            reindex_op, compose = False)

        need_to_return = True

      self._xds_spacegroup = Syminfo.spacegroup_name_to_number(pointgroup)

      # next pass this reindexing operator back to the source
      # of the reflections

      intgr.set_integrater_reindex_operator(reindex_op)
      intgr.set_integrater_spacegroup_number(
          Syminfo.spacegroup_name_to_number(pointgroup))
      self._sweep_information[epoch]['corrected_intensities'] \
        = intgr.get_integrater_corrected_intensities()

      hklin = self._sweep_information[epoch]['corrected_intensities']
      dname = self._sweep_information[epoch]['dname']
      hklout = os.path.join(self.get_working_directory(),
                            '%s_%s.HKL' % (dname, sname))

      # and copy the reflection file to the local
      # directory

      Debug.write('Copying %s to %s' % (hklin, hklout))
      shutil.copyfile(hklin, hklout)

      # record just the local file name...
      self._sweep_information[epoch][
          'prepared_reflections'] = os.path.split(hklout)[-1]

    if need_to_return:
      self.set_scaler_done(False)
      self.set_scaler_prepare_done(False)
      return

    unit_cell_list = []

    for epoch in self._sweep_information.keys():
      integrater = self._sweep_information[epoch]['integrater']
      cell = integrater.get_integrater_cell()
      n_ref = integrater.get_integrater_n_ref()

      Debug.write('Cell for %s: %.2f %.2f %.2f %.2f %.2f %.2f' % \
                  (integrater.get_integrater_sweep_name(),
                   cell[0], cell[1], cell[2],
                   cell[3], cell[4], cell[5]))
      Debug.write('=> %d reflections' % n_ref)

      unit_cell_list.append((cell, n_ref))

    self._scalr_cell = compute_average_unit_cell(unit_cell_list)

    self._scalr_resolution_limits = { }

    Debug.write('Determined unit cell: %.2f %.2f %.2f %.2f %.2f %.2f' % \
                tuple(self._scalr_cell))

    if os.path.exists(os.path.join(
        self.get_working_directory(),
        'REMOVE.HKL')):
      os.remove(os.path.join(
          self.get_working_directory(),
          'REMOVE.HKL'))

      Debug.write('Deleting REMOVE.HKL at end of scale prepare.')

    return

Example #53

0

Show file

File: test_3_gaussian.py Project: cctbx/cctbx-playground

    def run_fast(args):
      islow = args[0]
      result_val = flex.int(388)
      for jfast in xrange(388):
        histo1 = histograms[islow*388+jfast,:]
        nphotons = fit_3_gaussian.test_fit(histo1,plot=False)
        print islow*388+jfast, "of %d, # photons= %d"%(len(histograms),nphotons)
        result_val[jfast]=nphotons
      return {islow:result_val}

    iterable = [(i,) for i in xrange(185)]
    results = easy_mp.parallel_map(
      func=run_fast,
      iterable=iterable,
      processes=nproc,
      method="multiprocessing",
      preserve_order=True
    )

    for result in results:
      for key in result.keys():
        values = result[key]
        for i in xrange(len(values)):
          inelastic[key,i] = values[i]

  # save fitting result:
  # some heuristics to output a proper filename
  if fname.find('_offline')>=0:
    filename = fname.replace('_offline','_fitted')
  else:

Example #54

0

Show file

File: mp_index.py Project: dials/dials_scratch

    strongs.append(strong_filepath)

print "Found %d images to index"%len(images)

def index(item):
  image, strong = item
  base = os.path.splitext(os.path.basename(image))[0]
  datablock = os.path.join(params.output_dir, base + "_datablock.json")
  command = "dials.import %s output.datablock=%s"%(image, datablock)
  if params.reference_geometry is not None:
    command += " reference_geometry=%s"%params.reference_geometry
  easy_run.fully_buffered(command).raise_if_errors().show_stdout()

  command = "dials.index %s %s output.experiments=%s output.reflections=%s"% (
    datablock, strong, os.path.join(params.output_dir, base + "_experiments.json"),
                       os.path.join(params.output_dir, base + "_indexed.pickle"))
  if indexing_phil is not None:
    command += " %s"%indexing_phil

  easy_run.fully_buffered(command).show_stdout()

easy_mp.parallel_map(
  func=index,
  iterable=zip(images, strongs),
  processes=params.mp.nproc,
  method=params.mp.method,
  preserve_order=False,
  preserve_exception_message=True)

print "All done"

Example #55

0

Show file

File: stills_process.py Project: dials/dials

  def run(self):
    '''Execute the script.'''
    from dials.util import log
    from time import time
    from libtbx import easy_mp
    import copy

    # Parse the command line
    params, options, all_paths = self.parser.parse_args(show_diff_phil=False, return_unhandled=True)

    # Check we have some filenames
    if not all_paths:
      self.parser.print_help()
      return

    # Save the options
    self.options = options
    self.params = params

    st = time()

    # Configure logging
    log.config(
      params.verbosity,
      info='dials.process.log',
      debug='dials.process.debug.log')

    # Log the diff phil
    diff_phil = self.parser.diff_phil.as_str()
    if diff_phil is not '':
      logger.info('The following parameters have been modified:\n')
      logger.info(diff_phil)

    self.load_reference_geometry()
    from dials.command_line.dials_import import ManualGeometryUpdater
    update_geometry = ManualGeometryUpdater(params)

    # Import stuff
    logger.info("Loading files...")
    pre_import = params.dispatch.pre_import or len(all_paths) == 1
    if pre_import:
      # Handle still imagesets by breaking them apart into multiple datablocks
      # Further handle single file still imagesets (like HDF5) by tagging each
      # frame using its index

      datablocks = [do_import(path) for path in all_paths]
      if self.reference_detector is not None:
        from dxtbx.model import Detector
        for datablock in datablocks:
          for imageset in datablock.extract_imagesets():
            for i in range(len(imageset)):
              imageset.set_detector(
                Detector.from_dict(self.reference_detector.to_dict()),
                index=i)

      for datablock in datablocks:
        for imageset in datablock.extract_imagesets():
          update_geometry(imageset)

      indices = []
      basenames = []
      split_datablocks = []
      for datablock in datablocks:
        for imageset in datablock.extract_imagesets():
          paths = imageset.paths()
          for i in xrange(len(imageset)):
            subset = imageset[i:i+1]
            split_datablocks.append(DataBlockFactory.from_imageset(subset)[0])
            indices.append(i)
            basenames.append(os.path.splitext(os.path.basename(paths[i]))[0])
      tags = []
      for i, basename in zip(indices, basenames):
        if basenames.count(basename) > 1:
          tags.append("%s_%05d"%(basename, i))
        else:
          tags.append(basename)

      # Wrapper function
      def do_work(item):
        Processor(copy.deepcopy(params)).process_datablock(item[0], item[1])

      iterable = zip(tags, split_datablocks)

    else:
      basenames = [os.path.splitext(os.path.basename(filename))[0] for filename in all_paths]
      tags = []
      for i, basename in enumerate(basenames):
        if basenames.count(basename) > 1:
          tags.append("%s_%05d"%(basename, i))
        else:
          tags.append(basename)

      # Wrapper function
      def do_work(item):
        tag, filename = item

        datablock = do_import(filename)
        imagesets = datablock.extract_imagesets()
        if len(imagesets) == 0 or len(imagesets[0]) == 0:
          logger.info("Zero length imageset in file: %s"%filename)
          return
        if len(imagesets) > 1:
          raise Abort("Found more than one imageset in file: %s"%filename)
        if len(imagesets[0]) > 1:
          raise Abort("Found a multi-image file. Run again with pre_import=True")

        if self.reference_detector is not None:
          from dxtbx.model import Detector
          imagesets[0].set_detector(Detector.from_dict(self.reference_detector.to_dict()))

        update_geometry(imagesets[0])

        Processor(copy.deepcopy(params)).process_datablock(tag, datablock)

      iterable = zip(tags, all_paths)

    # Process the data
    if params.mp.method == 'mpi':
      from mpi4py import MPI
      comm = MPI.COMM_WORLD
      rank = comm.Get_rank() # each process in MPI has a unique id, 0-indexed
      size = comm.Get_size() # size: number of processes running in this job

      for i, item in enumerate(iterable):
        if (i+rank)%size == 0:
          do_work(item)
    else:
      easy_mp.parallel_map(
        func=do_work,
        iterable=iterable,
        processes=params.mp.nproc,
        method=params.mp.method,
        preserve_order=True,
        preserve_exception_message=True)

     # Total Time
    logger.info("")
    logger.info("Total Time Taken = %f seconds" % (time() - st))

Example #56

0

Show file

File: symmetry.py Project: biochem-fan/dials

def refined_settings_factory_from_refined_triclinic(
  params, experiments, reflections, i_setting=None,
  lepage_max_delta=5.0, nproc=1, refiner_verbosity=0):

  assert len(experiments.crystals()) == 1
  crystal = experiments.crystals()[0]

  used_reflections = copy.deepcopy(reflections)
  UC = crystal.get_unit_cell()

  from rstbx.dps_core.lepage import iotbx_converter

  Lfat = refined_settings_list()
  for item in iotbx_converter(UC, lepage_max_delta):
    Lfat.append(bravais_setting(item))

  supergroup = Lfat.supergroup()
  triclinic = Lfat.triclinic()
  triclinic_miller = used_reflections['miller_index']

  # assert no transformation between indexing and bravais list
  assert str(triclinic['cb_op_inp_best'])=="a,b,c"

  Nset = len(Lfat)
  for j in xrange(Nset):  Lfat[j].setting_number = Nset-j

  from cctbx.crystal_orientation import crystal_orientation
  from cctbx import sgtbx
  from scitbx import matrix
  for j in xrange(Nset):
    cb_op = Lfat[j]['cb_op_inp_best'].c().as_double_array()[0:9]
    orient = crystal_orientation(crystal.get_A(),True)
    orient_best = orient.change_basis(matrix.sqr(cb_op).transpose())
    constrain_orient = orient_best.constrain(Lfat[j]['system'])
    bravais = Lfat[j]["bravais"]
    cb_op_best_ref = Lfat[j]['best_subsym'].change_of_basis_op_to_reference_setting()
    space_group = sgtbx.space_group_info(
      number=bravais_lattice_to_lowest_symmetry_spacegroup_number[bravais]).group()
    space_group = space_group.change_basis(cb_op_best_ref.inverse())
    bravais = str(bravais_types.bravais_lattice(group=space_group))
    Lfat[j]["bravais"] = bravais
    Lfat[j].unrefined_crystal = dials_crystal_from_orientation(
      constrain_orient, space_group)

  args = []
  for subgroup in Lfat:
    args.append((
      params, subgroup, used_reflections, experiments, refiner_verbosity))

  results = easy_mp.parallel_map(
    func=refine_subgroup,
    iterable=args,
    processes=nproc,
    method="multiprocessing",
    preserve_order=True,
    asynchronous=True,
    preserve_exception_message=True)

  for i, result in enumerate(results):
    Lfat[i] = result
  return Lfat

Example #57

0

Show file

File: stills_process.py Project: biochem-fan/dials

  def run(self):
    '''Execute the script.'''
    from dials.util import log
    from logging import info
    from time import time
    from libtbx.utils import Abort
    from libtbx import easy_mp
    import os, copy
    from dxtbx.datablock import DataBlockFactory

    # Parse the command line
    params, options, all_paths = self.parser.parse_args(show_diff_phil=False, return_unhandled=True)

    # Check we have some filenames
    if len(all_paths) == 0:
      self.parser.print_help()
      return

    # Save the options
    self.options = options
    self.params = params

    st = time()

    # Configure logging
    log.config(
      params.verbosity,
      info='dials.process.log',
      debug='dials.process.debug.log')

    # Log the diff phil
    diff_phil = self.parser.diff_phil.as_str()
    if diff_phil is not '':
      info('The following parameters have been modified:\n')
      info(diff_phil)

    # Import stuff
    info("Loading files...")
    if len(all_paths) == 1:
      datablocks = DataBlockFactory.from_filenames(all_paths)
    else:
      def do_import(filename):
        info("Loading %s"%os.path.basename(filename))
        datablocks = DataBlockFactory.from_filenames([filename])
        if len(datablocks) == 0:
          raise Abort("Could not load %s"%filename)
        if len(datablocks) > 1:
          raise Abort("Got multiple datablocks from file %s"%filename)
        return datablocks[0]

      datablocks = easy_mp.parallel_map(
        func=do_import,
        iterable=all_paths,
        processes=params.mp.nproc,
        method=params.mp.method,
        preserve_order=True,
        preserve_exception_message=True)

    if len(datablocks) == 0:
      raise Abort('No datablocks specified')

    # Handle still imagesets by breaking them apart into multiple datablocks
    # Further handle single file still imagesets (like HDF5) by tagging each
    # frame using its index
    indices = []
    basenames = []
    split_datablocks = []
    for datablock in datablocks:
      for imageset in datablock.extract_imagesets():
        for i in xrange(len(imageset)):
          subset = imageset[i:i+1]
          split_datablocks.append(DataBlockFactory.from_imageset(subset)[0])
          indices.append(i)
          basenames.append(os.path.splitext(os.path.basename(subset.paths()[0]))[0])
    tags = []
    for i, basename in zip(indices, basenames):
      if basenames.count(basename) > 1:
        tags.append("%s_%d"%(basename, i))
      else:
        tags.append(basename)

    # Wrapper function
    def do_work(item):
      Processor(copy.deepcopy(params)).process_datablock(item[0], item[1])

    # Process the data
    easy_mp.parallel_map(
      func=do_work,
      iterable=zip(tags, split_datablocks),
      processes=params.mp.nproc,
      method=params.mp.method,
      preserve_order=True,
      preserve_exception_message=True)

     # Total Time
    info("")
    info("Total Time Taken = %f seconds" % (time() - st))

Example #58

0

Show file

File: calculate_n_expected_reflections.py Project: dials/dials_scratch

def run(args):

  from libtbx.phil import command_line

  from dials.util.command_line import Importer
  from dials.array_family import flex
  print args
  importer = Importer(args, check_format=False)
  assert len(importer.datablocks) == 1
  sweeps = importer.datablocks[0].extract_imagesets()
  assert len(sweeps) == 1
  sweep = sweeps[0]

  cmd_line = command_line.argument_interpreter(master_params=master_phil_scope)
  working_phil = cmd_line.process_and_fetch(args=importer.unhandled_arguments)
  working_phil.show()

  params = working_phil.extract()
  assert params.unit_cell is not None
  assert params.space_group is not None
  unit_cell = params.unit_cell
  space_group = params.space_group.group()

  import random
  from dxtbx.model.crystal import crystal_model
  from cctbx import crystal, miller
  from scitbx import matrix

  flex.set_random_seed(params.random_seed)
  random.seed(params.random_seed)

  crystal_symmetry = crystal.symmetry(unit_cell=unit_cell,
                                      space_group=space_group)

  # the reciprocal matrix
  B = matrix.sqr(unit_cell.fractionalization_matrix()).transpose()

  n_predicted = flex.double()

  def predict_once(args):
    from dxtbx.model.experiment.experiment_list import Experiment
    U = args[0]
    A = U * B
    direct_matrix = A.inverse()
    cryst_model = crystal_model(direct_matrix[0:3],
                                direct_matrix[3:6],
                                direct_matrix[6:9],
                                space_group=space_group)
    experiment = Experiment(imageset=sweep,
                            beam=sweep.get_beam(),
                            detector=sweep.get_detector(),
                            goniometer=sweep.get_goniometer(),
                            scan=sweep.get_scan(),
                            crystal=cryst_model)
    predicted_reflections = flex.reflection_table.from_predictions(
      experiment)
    miller_indices = predicted_reflections['miller_index']
    miller_set = miller.set(
      crystal_symmetry, miller_indices, anomalous_flag=True)
    if params.d_min is not None:
      resolution_sel = miller_set.d_spacings().data() > params.d_min
      predicted_reflections = predicted_reflections.select(resolution_sel)
    return len(predicted_reflections)

  from libtbx import easy_mp
  args = [(random_rotation(),) for i in range(params.n_samples)]
  results = easy_mp.parallel_map(
    func=predict_once,
    iterable=args,
    processes=params.nproc,
    preserve_order=True,
    preserve_exception_message=True)
  n_predicted = flex.double(results)

  print "Basic statistics:"
  from scitbx.math import basic_statistics
  stats = basic_statistics(n_predicted)
  stats.show()

  print "Histogram:"
  hist = flex.histogram(n_predicted, n_slots=20)
  hist.show()

  print "Raw spot counts:"
  print list(n_predicted)

  if params.plot:
    from matplotlib import pyplot
    from matplotlib.backends.backend_pdf import PdfPages

    pyplot.rc('font', family='serif')
    pyplot.rc('font', serif='Times New Roman')

    red, blue = '#B2182B', '#2166AC'
    fig = pyplot.figure()
    ax = fig.add_subplot(1,1,1)
    ax.bar(hist.slot_centers(), hist.slots(), width=0.75*hist.slot_width(),
           color=blue, edgecolor=blue)
    ax.set_xlabel('Spot count')
    ax.set_ylabel('Frequency')
    pdf = PdfPages("predicted_count_histogram.pdf")
    pdf.savefig(fig)
    pdf.close()