Beispiel #1
0
    def get_file_list(self):

        # get json/pickle file list
        loader = file_loader(self.params)
        file_list = loader.filename_lister()

        print("Discovered %d input json/pickle file pairs" % len(file_list))

        return file_list
    def get_list(self):

        # example showing what reading all the data into a single experiment list/
        # reflection table would look like

        loader = file_loader(self.params)

        file_list = list(loader.filepair_generator())

        return file_list
  def get_file_list(self):

    # get json/pickle file list
    loader = file_loader(self.params)
    file_list = loader.filename_lister()

    if debug_file_load_calculator:
      self.log_write("\nDiscovered %d input json/pickle file pairs"%len(file_list))

    print("\nDiscovered %d input json/pickle file pairs"%len(file_list))

    return file_list
Beispiel #4
0
  def load_data(self):
    from xfel.merging.application.input.file_loader import file_loader
    from dxtbx.model.experiment_list import ExperimentList
    from dials.array_family import flex
    all_experiments = ExperimentList()
    all_reflections = flex.reflection_table()

    # example showing what reading all the data into a single experiment list/
    # reflection table would look like
    loader = file_loader(self.params)
    for experiments_filename, reflections_filename in loader.filepair_generator():
      experiments, reflections = loader.load_data(experiments_filename, reflections_filename)
      for experiment_id, experiment in enumerate(experiments):
        all_experiments.append(experiment)
        refls = reflections.select(reflections['id'] == experiment_id)
        refls['id'] = flex.int(len(refls), len(all_experiments)-1)
        all_reflections.extend(refls)

    all_reflections.sort('miller_index_asymmetric')
    return all_experiments, all_reflections
Beispiel #5
0
  def load_data(self, file_list):
    from dxtbx.model.experiment_list import ExperimentList
    from dials.array_family import flex
    all_experiments = ExperimentList()
    all_reflections = flex.reflection_table()

    loader = file_loader(self.params)

    for experiments_filename, reflections_filename in file_list:

      experiments, reflections = loader.load_data(experiments_filename, reflections_filename)

      for experiment_id, experiment in enumerate(experiments):
        all_experiments.append(experiment)
        refls = reflections.select(reflections['id'] == experiment_id)

        if len(refls) > 0:
          refls['id'] = flex.int(len(refls), len(all_experiments)-1)
          all_reflections.extend(refls)

    return all_experiments, all_reflections
Beispiel #6
0
    def load_data(self, file_list):
        from dxtbx.model.experiment_list import ExperimentList
        from dials.array_family import flex
        all_experiments = ExperimentList()
        all_reflections = flex.reflection_table()

        # example showing what reading all the data into a single experiment list/
        # reflection table would look like

        loader = file_loader(self.params)

        for experiments_filename, reflections_filename in file_list:
            experiments, reflections = loader.load_data(
                experiments_filename, reflections_filename)
            for experiment_id, experiment in enumerate(experiments):
                all_experiments.append(experiment)
                refls = reflections.select(reflections['id'] == experiment_id)
                refls['id'] = flex.int(len(refls), len(all_experiments) - 1)
                all_reflections.extend(refls)

        return all_experiments, all_reflections
Beispiel #7
0
  def run(self, comm, timing=True):

    # create timing table
    self.do_timing = timing
    if self.do_timing:
      self.timing_table = dict()

    # get rank and rank count
    self.rank       = comm.Get_rank()
    self.rank_count = comm.Get_size()

    self.log_step_time("TOTAL")

    # prepare a list of input params and data files to be transmitted to all worker ranks
    transmitted = {}
    if self.rank == 0:
      self.log_step_time("PARSE_INPUT_PARAMS")

      # read and parse phil
      self.initialize()
      self.validate()

      # initialize output paths; doing it here for rank 0 only - to be able to log timing, etc., before every rank initializes the paths
      if self.do_timing:
        self.mpi_timing_file_path = self.params.output.output_dir + '/timing_%06d_%06d.out'%(self.rank_count, self.rank)
      self.mpi_log_file_path = self.params.output.output_dir + '/rank_%06d_%06d.out'%(self.rank_count, self.rank)
      self.merged_reflections_file_path = self.params.output.output_dir + '/merge.out'

      self.log_step_time("PARSE_INPUT_PARAMS", True)

      ###########################
      # GET JSON/PICKLE FILE LIST
      self.log_step_time("LIST_FILES")
      loader = file_loader(self.params)
      file_list = loader.filename_lister()

      self.log_step_time("CALCULATE_FILE_LOAD")
      load_calculator = file_load_calculator(self.params, file_list)
      rank_files = load_calculator.calculate_file_load(self.rank_count)

      total_file_pairs = 0
      for key, value in rank_files.items():
        total_file_pairs += len(value)
      self.mpi_log_write("\nRank 0 generated a list of %d file items for %d ranks"%(total_file_pairs,len(rank_files)))
      self.log_step_time("CALCULATE_FILE_LOAD", True)

      self.log_step_time("LIST_FILES", True)

      # prepare for transmitting the job to all ranks
      self.mpi_log_write("\nRank 0 transmitting file list of length %d"%(len(file_list)))
      transmitted = dict(params = self.params, options = self.options, rank_files = rank_files)

    #########################################
    # BROADCAST WORK FROM RANK 0 TO ALL RANKS
    comm.barrier()
    self.log_step_time("BROADCAST")

    transmitted = comm.bcast(transmitted, root = 0)

    self.params = transmitted['params']
    self.options = transmitted['options']
    new_file_list = transmitted['rank_files'][self.rank]

    if self.do_timing:
      self.mpi_timing_file_path = self.params.output.output_dir + '/timing_%06d_%06d.out'%(self.rank_count, self.rank)
    self.mpi_log_file_path = self.params.output.output_dir + '/rank_%06d_%06d.out'%(self.rank_count, self.rank)

    self.mpi_log_write ("\nRank %d received a file list of %d json-pickle file pairs" % (self.rank, len(new_file_list)))
    comm.barrier()
    self.log_step_time("BROADCAST", True)

    ################################################################################################
    # EACH RANK: SET UP HKL CHUNKS TO BE USED FOR DISTRIBUTING LOADED REFLECTIONS AND FOR ALL-TO-ALL
    self.log_step_time("SETUP_HKL_CHUNKS")
    self.setup_hkl_chunks() # even if a rank got no files to load, it still has to participate in all-to-all
    comm.barrier()
    self.log_step_time("SETUP_HKL_CHUNKS", True)

    # initialize data loading statistics
    rank_experiment_count = 0
    rank_image_count = 0
    rank_reflection_count = 0

    if len(new_file_list) > 0:
      self.mpi_log_write("\nRank %d: first file to load is: %s"%(self.rank, str(new_file_list[0])))

      ######################
      # EACH RANK: LOAD DATA
      self.log_step_time("LOAD")
      experiments, reflections = self.load_data(new_file_list)

      self.mpi_log_write ('\nRank %d has read %d experiments consisting of %d reflections'%(self.rank, len(experiments), len(reflections)))
      self.mpi_log_write("\nRANK %d: %s"%(self.rank, get_memory_usage()))

      comm.barrier()
      self.log_step_time("LOAD", True)

      ###############################################################################
      # EACH RANK: GET THE TOTAL NUMBER OF LOADED EXPERIMENTS, IMAGES AND REFLECTIONS
      self.log_step_time("LOAD_STATISTICS")
      rank_experiment_count = len(experiments)

      # count number of images
      all_imgs = []
      for iset in experiments.imagesets():
        all_imgs.extend(iset.paths())
      rank_image_count = len(set(all_imgs))

      rank_reflection_count = len(reflections)
      comm.barrier()
      self.log_step_time("LOAD_STATISTICS", True)

      #####################################################
      # EACH RANK: ADD A COLUMN WITH SYMMETRY-REDUCED HKL's
      self.log_step_time("ADD_ASU_HKL_COLUMN")
      #reflections = self.add_asu_miller_indices_column(experiments, reflections)
      self.add_asu_miller_indices_column(experiments, reflections)
      comm.barrier()
      self.log_step_time("ADD_ASU_HKL_COLUMN", True)

      #########################################################################
      # EACH RANK: PRUNE REFLECTION COLUMNS, WHICH ARE NOT RELEVANT FOR MERGING
      self.log_step_time("PRUNE")
      self.prune_reflection_columns(reflections=reflections)
      comm.barrier()
      self.log_step_time("PRUNE", True)
    else:
      reflections = distribute_reflection_table()
      self.mpi_log_write ("\nRank %d received no data" % self.rank)
      comm.barrier()
      comm.barrier()
      comm.barrier()
      comm.barrier()

    ########################################
    # MPI-REDUCE ALL DATA LOADING STATISTICS
    self.log_step_time("REDUCE_LOAD_STATS")
    total_experiment_count  = comm.reduce(rank_experiment_count, MPI.SUM, 0)
    total_image_count       = comm.reduce(rank_image_count, MPI.SUM, 0)
    total_reflection_count  = comm.reduce(rank_reflection_count, MPI.SUM, 0)
    max_reflection_count    = comm.reduce(rank_reflection_count, MPI.MAX, 0)
    min_reflection_count    = comm.reduce(rank_reflection_count, MPI.MIN, 0)

    ########################################################################################
    # RANK 0: LOG ALL DATA LOADING STATISTICS, INITIALIZE THE TOTAL MERGING REFLECTION TABLE
    if self.rank == 0:
      self.mpi_log_write('\nAll ranks have read %d experiments'%total_experiment_count)
      self.mpi_log_write('\nAll ranks have read %d images'%total_image_count)
      self.mpi_log_write('\nAll ranks have read %d reflections'%total_reflection_count)
      self.mpi_log_write('\nThe maximum number of reflections loaded per rank is: %d reflections'%max_reflection_count)
      self.mpi_log_write('\nThe minimum number of reflections loaded per rank is: %d reflections'%min_reflection_count)

    comm.barrier()
    self.log_step_time("REDUCE_LOAD_STATS", True)

    #######################################################
    # EACH RANK: DISTRIBUTE REFLECTIONS OVER ALL HKL CHUNKS
    self.log_step_time("DISTRIBUTE")
    self.distribute_reflections_over_hkl_chunks(reflections=reflections)
    comm.barrier()
    self.log_step_time("DISTRIBUTE", True)

    #############################################################
    # EACH RANK: GET A REFLECTION TABLE FOR MERGING BY ALL-TO-ALL
    if self.params.parallel.a2a == 1:
      alltoall_reflections = self.get_reflections_from_alltoall(comm=comm)
    else: # if encountered alltoall memory problem, do alltoall on chunk slices
      alltoall_reflections = self.get_reflections_from_alltoall_sliced(comm=comm, number_of_slices=self.params.parallel.a2a)

    #####################################################
    # EACH RANK: SORT THE REFLECTION TABLE BEFORE MERGING
    self.log_step_time("SORT")
    self.mpi_log_write("\nRank %d sorting consolidated reflection table..."%self.rank)

    if len(alltoall_reflections) > 0:
      alltoall_reflections.sort('miller_index_asymmetric')

    comm.barrier()
    self.log_step_time("SORT", True)

    ####################################################
    # EACH RANK: DO STATISTICS ON REFLECTION INTENSITIES
    self.log_step_time("AVERAGE")
    self.mpi_log_write("\nRank %d doing intensity statistics..."%self.rank)
    all_rank_merged_reflections = merging_reflection_table()

    if len(alltoall_reflections) > 0:
      for hkl_reflection_table in self.get_next_hkl_reflection_table(reflections=alltoall_reflections):
        intensity_stats = self.calc_reflection_intensity_stats(reflections=hkl_reflection_table)
        all_rank_merged_reflections.append({'miller_index': hkl_reflection_table[0].get('miller_index_asymmetric'),
                                            'intensity': intensity_stats['average'],
                                            'esd' : intensity_stats['esd'],
                                            'rmsd' : intensity_stats['rmsd'],
                                            'multiplicity': intensity_stats['multiplicity']})

    self.mpi_log_write ("\nRank %d merged intensities for %d HKLs"%(self.rank, all_rank_merged_reflections.size()))
    comm.barrier()
    self.log_step_time("AVERAGE", True)

    ####################################################
    # EACH RANK: SEND MERGED REFLECTION TABLES TO RANK 0
    self.log_step_time("GATHER")
    if self.rank != 0:
      self.mpi_log_write("\nRank %d executing MPI gathering of all reflection tables at rank 0..."%self.rank)
    all_merged_reflection_tables = comm.gather(all_rank_merged_reflections, root = 0)
    comm.barrier()
    self.log_step_time("GATHER", True)

    ####################################
    # RANK 0: DO FINAL MERGING OF TABLES
    if self.rank == 0:
      self.log_step_time("MERGE")
      final_merged_reflection_table = merging_reflection_table()

      self.mpi_log_write ("\nRank 0 doing final merging of reflection tables received from all ranks...")
      for table in all_merged_reflection_tables:
        final_merged_reflection_table.extend(table)
      self.mpi_log_write("\nRank 0 total merged HKLs: {}".format(final_merged_reflection_table.size()))
      self.log_step_time("MERGE", True)

      # write the final merged reflection table out to an ASCII file
      self.log_step_time("WRITE")
      self.output_merged_reflections(final_merged_reflection_table)
      self.log_step_time("WRITE", True)

    comm.barrier()
    self.log_step_time("TOTAL", True)

    MPI.Finalize()

    return