def run(self, all_experiments, all_reflections):
        """ Load all the data using MPI """
        from dxtbx.model.experiment_list import ExperimentList
        from dials.array_family import flex

        # Both must be none or not none
        test = [all_experiments is None, all_reflections is None].count(True)
        assert test in [0, 2]
        if test == 2:
            all_experiments = ExperimentList()
            all_reflections = flex.reflection_table()
            starting_expts_count = starting_refls_count = 0
        else:
            starting_expts_count = len(all_experiments)
            starting_refls_count = len(all_reflections)
        self.logger.log(
            "Initial number of experiments: %d; Initial number of reflections: %d"
            % (starting_expts_count, starting_refls_count))

        # Generate and send a list of file paths to each worker
        if self.mpi_helper.rank == 0:
            file_list = self.get_list()
            self.logger.log(
                "Built an input list of %d json/pickle file pairs" %
                (len(file_list)))
            self.params.input.path = None  # Rank 0 has already parsed the input parameters
            per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\
                                    calculate_file_load(available_rank_count = self.mpi_helper.size)
            self.logger.log(
                'Transmitting a list of %d lists of json/pickle file pairs' %
                (len(per_rank_file_list)))
            transmitted = per_rank_file_list
        else:
            transmitted = None

        self.logger.log_step_time("BROADCAST_FILE_LIST")
        transmitted = self.mpi_helper.comm.bcast(transmitted, root=0)
        new_file_list = transmitted[
            self.mpi_helper.
            rank] if self.mpi_helper.rank < len(transmitted) else None
        self.logger.log_step_time("BROADCAST_FILE_LIST", True)

        # Load the data
        self.logger.log_step_time("LOAD")
        if new_file_list is not None:
            self.logger.log("Received a list of %d json/pickle file pairs" %
                            len(new_file_list))
            for experiments_filename, reflections_filename in new_file_list:
                self.logger.log("Reading %s %s" %
                                (experiments_filename, reflections_filename))
                experiments = ExperimentListFactory.from_json_file(
                    experiments_filename, check_format=False)
                reflections = flex.reflection_table.from_file(
                    reflections_filename)
                self.logger.log("Data read, prepping")

                if 'intensity.sum.value' in reflections:
                    reflections[
                        'intensity.sum.value.unmodified'] = reflections[
                            'intensity.sum.value'] * 1
                if 'intensity.sum.variance' in reflections:
                    reflections[
                        'intensity.sum.variance.unmodified'] = reflections[
                            'intensity.sum.variance'] * 1

                new_ids = flex.int(len(reflections), -1)
                new_identifiers = flex.std_string(len(reflections))
                eid = reflections.experiment_identifiers()
                for k in eid.keys():
                    del eid[k]
                for experiment_id, experiment in enumerate(experiments):
                    # select reflections of the current experiment
                    refls_sel = reflections['id'] == experiment_id

                    if refls_sel.count(True) == 0: continue

                    if experiment.identifier is None or len(
                            experiment.identifier) == 0:
                        experiment.identifier = create_experiment_identifier(
                            experiment, experiments_filename, experiment_id)

                    if not self.params.input.keep_imagesets:
                        experiment.imageset = None
                    all_experiments.append(experiment)

                    # Reflection experiment 'id' is unique within this rank; 'exp_id' (i.e. experiment identifier) is unique globally
                    new_identifiers.set_selected(refls_sel,
                                                 experiment.identifier)

                    new_id = len(all_experiments) - 1
                    eid[new_id] = experiment.identifier
                    new_ids.set_selected(refls_sel, new_id)
                assert (new_ids < 0
                        ).count(True) == 0, "Not all reflections accounted for"
                reflections['id'] = new_ids
                reflections['exp_id'] = new_identifiers
                all_reflections.extend(reflections)
        else:
            self.logger.log("Received a list of 0 json/pickle file pairs")
        self.logger.log_step_time("LOAD", True)

        self.logger.log('Read %d experiments consisting of %d reflections' %
                        (len(all_experiments) - starting_expts_count,
                         len(all_reflections) - starting_refls_count))
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        all_reflections = self.prune_reflection_table_keys(all_reflections)

        # Do we have any data?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(all_experiments, all_reflections)
        return all_experiments, all_reflections
Example #2
0
    def run(self, all_experiments, all_reflections):
        """ Load all the data using MPI """
        from dxtbx.model.experiment_list import ExperimentList
        from dials.array_family import flex

        # Both must be none or not none
        test = [all_experiments is None, all_reflections is None].count(True)
        assert test in [0, 2]
        if test == 2:
            all_experiments = ExperimentList()
            all_reflections = flex.reflection_table()
            starting_expts_count = starting_refls_count = 0
        else:
            starting_expts_count = len(all_experiments)
            starting_refls_count = len(all_reflections)
        self.logger.log(
            "Initial number of experiments: %d; Initial number of reflections: %d"
            % (starting_expts_count, starting_refls_count))

        # Generate and send a list of file paths to each worker
        if self.mpi_helper.rank == 0:
            file_list = self.get_list()
            self.logger.log(
                "Built an input list of %d json/pickle file pairs" %
                (len(file_list)))
            self.params.input.path = None  # the input is already parsed
            per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\
                                    calculate_file_load(available_rank_count = self.mpi_helper.size)
            self.logger.log(
                'Transmitting a list of %d lists of json/pickle file pairs' %
                (len(per_rank_file_list)))
            transmitted = per_rank_file_list
        else:
            transmitted = None

        self.logger.log_step_time("BROADCAST_FILE_LIST")

        transmitted = self.mpi_helper.comm.bcast(transmitted, root=0)

        new_file_list = transmitted[self.mpi_helper.rank]

        self.logger.log("Received a list of %d json/pickle file pairs" %
                        len(new_file_list))
        self.logger.log_step_time("BROADCAST_FILE_LIST", True)

        # Load the data
        self.logger.log_step_time("LOAD")
        for experiments_filename, reflections_filename in new_file_list:
            experiments = ExperimentListFactory.from_json_file(
                experiments_filename, check_format=False)
            reflections = flex.reflection_table.from_file(reflections_filename)

            for experiment_id, experiment in enumerate(experiments):
                if experiment.identifier is None or len(
                        experiment.identifier) == 0:
                    experiment.identifier = create_experiment_identifier(
                        experiment, experiments_filename, experiment_id)
                all_experiments.append(experiment)

                refls = reflections.select(reflections['id'] == experiment_id)
                refls['exp_id'] = flex.std_string(len(refls),
                                                  experiment.identifier)
                all_reflections.extend(refls)
        self.logger.log_step_time("LOAD", True)

        self.logger.log('Read %d experiments consisting of %d reflections' %
                        (len(all_experiments) - starting_expts_count,
                         len(all_reflections) - starting_refls_count))
        self.logger.log(get_memory_usage())

        # Count the loaded data
        data_counter(self.params).count(all_experiments, all_reflections)

        return all_experiments, all_reflections
Example #3
0
    def run(self, all_experiments, all_reflections):
        """ Load all the data using MPI """
        from dxtbx.model.experiment_list import ExperimentList
        from dials.array_family import flex

        # Both must be none or not none
        test = [all_experiments is None, all_reflections is None].count(True)
        assert test in [0, 2]
        if test == 2:
            all_experiments = ExperimentList()
            all_reflections = flex.reflection_table()
            starting_expts_count = starting_refls_count = 0
        else:
            starting_expts_count = len(all_experiments)
            starting_refls_count = len(all_reflections)
        self.logger.log(
            "Initial number of experiments: %d; Initial number of reflections: %d"
            % (starting_expts_count, starting_refls_count))

        # Generate and send a list of file paths to each worker
        if self.mpi_helper.rank == 0:
            file_list = self.get_list()
            self.logger.log(
                "Built an input list of %d json/pickle file pairs" %
                (len(file_list)))
            self.params.input.path = None  # Rank 0 has already parsed the input parameters
            per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\
                                    calculate_file_load(available_rank_count = self.mpi_helper.size)
            self.logger.log(
                'Transmitting a list of %d lists of json/pickle file pairs' %
                (len(per_rank_file_list)))
            transmitted = per_rank_file_list
        else:
            transmitted = None

        self.logger.log_step_time("BROADCAST_FILE_LIST")
        transmitted = self.mpi_helper.comm.bcast(transmitted, root=0)
        new_file_list = transmitted[
            self.mpi_helper.
            rank] if self.mpi_helper.rank < len(transmitted) else None
        self.logger.log_step_time("BROADCAST_FILE_LIST", True)

        # Load the data
        self.logger.log_step_time("LOAD")
        if new_file_list is not None:
            self.logger.log("Received a list of %d json/pickle file pairs" %
                            len(new_file_list))
            for experiments_filename, reflections_filename in new_file_list:
                experiments = ExperimentListFactory.from_json_file(
                    experiments_filename, check_format=False)
                reflections = flex.reflection_table.from_file(
                    reflections_filename)

                for experiment_id, experiment in enumerate(experiments):
                    if experiment.identifier is None or len(
                            experiment.identifier) == 0:
                        experiment.identifier = create_experiment_identifier(
                            experiment, experiments_filename, experiment_id)
                    all_experiments.append(experiment)
                    #experiment.identifier = "%d"%(len(all_experiments) - 1)

                    # select reflections of the current experiment
                    refls = reflections.select(
                        reflections['id'] == experiment_id)

                    # Reflection experiment 'id' is supposed to be unique within this rank; 'exp_id' (i.e. experiment identifier) is supposed to be unique globally
                    #refls['id'] = flex.size_t(len(refls), len(all_experiments)-1)
                    refls['exp_id'] = flex.std_string(len(refls),
                                                      experiment.identifier)

                    all_reflections.extend(refls)
        else:
            self.logger.log("Received a list of 0 json/pickle file pairs")
        self.logger.log_step_time("LOAD", True)

        self.logger.log('Read %d experiments consisting of %d reflections' %
                        (len(all_experiments) - starting_expts_count,
                         len(all_reflections) - starting_refls_count))
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        from xfel.merging.application.reflection_table_utils import reflection_table_utils
        all_reflections = reflection_table_utils.prune_reflection_table_keys(
            reflections=all_reflections,
            keys_to_keep=[
                'intensity.sum.value', 'intensity.sum.variance',
                'miller_index', 'miller_index_asymmetric', 'exp_id', 's1'
            ])
        self.logger.log("Pruned reflection table")
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        # Do we have any data?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(all_experiments, all_reflections)

        return all_experiments, all_reflections
Example #4
0
    def run(self, all_experiments, all_reflections):
        """ Load all the data using MPI """
        from dxtbx.model.experiment_list import ExperimentList
        from dials.array_family import flex

        # Both must be none or not none
        test = [all_experiments is None, all_reflections is None].count(True)
        assert test in [0, 2]
        if test == 2:
            all_experiments = ExperimentList()
            all_reflections = flex.reflection_table()
            starting_expts_count = starting_refls_count = 0
        else:
            starting_expts_count = len(all_experiments)
            starting_refls_count = len(all_reflections)
        self.logger.log(
            "Initial number of experiments: %d; Initial number of reflections: %d"
            % (starting_expts_count, starting_refls_count))

        # Generate and send a list of file paths to each worker
        if self.mpi_helper.rank == 0:
            file_list = self.get_list()
            self.logger.log(
                "Built an input list of %d json/pickle file pairs" %
                (len(file_list)))
            self.params.input.path = None  # Rank 0 has already parsed the input parameters

            # optionally write a file list mapping to disk, useful in post processing if save_experiments_and_reflections=True
            file_id_from_names = None
            if self.params.output.expanded_bookkeeping:
                apath = lambda x: os.path.abspath(x)
                file_names_from_id = {
                    i_f: tuple(map(apath, exp_ref_pair))
                    for i_f, exp_ref_pair in enumerate(file_list)
                }
                with open(
                        os.path.join(self.params.output.output_dir,
                                     "file_list_map.json"), "w") as o:
                    json.dump(file_names_from_id, o)
                file_id_from_names = {
                    tuple(map(apath, exp_ref_pair)): i_f
                    for i_f, exp_ref_pair in enumerate(file_list)
                }

            per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\
                                    calculate_file_load(available_rank_count = self.mpi_helper.size)
            self.logger.log(
                'Transmitting a list of %d lists of json/pickle file pairs' %
                (len(per_rank_file_list)))
            transmitted = per_rank_file_list, file_id_from_names
        else:
            transmitted = None

        self.logger.log_step_time("BROADCAST_FILE_LIST")
        new_file_list, file_names_mapping = self.mpi_helper.comm.bcast(
            transmitted, root=0)
        new_file_list = new_file_list[
            self.mpi_helper.
            rank] if self.mpi_helper.rank < len(new_file_list) else None
        self.logger.log_step_time("BROADCAST_FILE_LIST", True)

        # Load the data
        self.logger.log_step_time("LOAD")
        if new_file_list is not None:
            self.logger.log("Received a list of %d json/pickle file pairs" %
                            len(new_file_list))
            for experiments_filename, reflections_filename in new_file_list:
                self.logger.log("Reading %s %s" %
                                (experiments_filename, reflections_filename))
                experiments = ExperimentListFactory.from_json_file(
                    experiments_filename,
                    check_format=self.params.input.read_image_headers)
                reflections = flex.reflection_table.from_file(
                    reflections_filename)
                if self.params.output.expanded_bookkeeping:
                    # NOTE: these are un-prunable
                    reflections["input_refl_index"] = flex.int(
                        list(range(len(reflections))))
                    reflections["orig_exp_id"] = reflections['id']
                    assert file_names_mapping is not None
                    exp_ref_pair = os.path.abspath(
                        experiments_filename), os.path.abspath(
                            reflections_filename)
                    this_refl_fileMappings = [
                        file_names_mapping[exp_ref_pair]
                    ] * len(reflections)
                    reflections["file_list_mapping"] = flex.int(
                        this_refl_fileMappings)
                self.logger.log("Data read, prepping")

                if 'intensity.sum.value' in reflections:
                    reflections[
                        'intensity.sum.value.unmodified'] = reflections[
                            'intensity.sum.value'] * 1
                if 'intensity.sum.variance' in reflections:
                    reflections[
                        'intensity.sum.variance.unmodified'] = reflections[
                            'intensity.sum.variance'] * 1

                new_ids = flex.int(len(reflections), -1)
                new_identifiers = flex.std_string(len(reflections))
                eid = reflections.experiment_identifiers()
                for k in eid.keys():
                    del eid[k]

                if self.params.output.expanded_bookkeeping:
                    preGen_experiment_identifiers(experiments,
                                                  experiments_filename)
                for experiment_id, experiment in enumerate(experiments):
                    # select reflections of the current experiment
                    refls_sel = reflections['id'] == experiment_id

                    if refls_sel.count(True) == 0: continue

                    if experiment.identifier is None or len(
                            experiment.identifier) == 0:
                        experiment.identifier = create_experiment_identifier(
                            experiment, experiments_filename, experiment_id)

                    if not self.params.input.keep_imagesets:
                        experiment.imageset = None
                    all_experiments.append(experiment)

                    # Reflection experiment 'id' is unique within this rank; 'exp_id' (i.e. experiment identifier) is unique globally
                    new_identifiers.set_selected(refls_sel,
                                                 experiment.identifier)

                    new_id = len(all_experiments) - 1
                    eid[new_id] = experiment.identifier
                    new_ids.set_selected(refls_sel, new_id)
                assert (new_ids < 0
                        ).count(True) == 0, "Not all reflections accounted for"
                reflections['id'] = new_ids
                reflections['exp_id'] = new_identifiers
                all_reflections.extend(reflections)
        else:
            self.logger.log("Received a list of 0 json/pickle file pairs")
        self.logger.log_step_time("LOAD", True)

        self.logger.log('Read %d experiments consisting of %d reflections' %
                        (len(all_experiments) - starting_expts_count,
                         len(all_reflections) - starting_refls_count))
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        all_reflections = self.prune_reflection_table_keys(all_reflections)

        # Do we have any data?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(all_experiments, all_reflections)
        return all_experiments, all_reflections
Example #5
0
    def run(self, all_experiments, all_reflections):
        """ Load all the data using MPI """
        from dxtbx.model.experiment_list import ExperimentList
        from dials.array_family import flex

        # Both must be none or not none
        test = [all_experiments is None, all_reflections is None].count(True)
        assert test in [0, 2]
        if test == 2:
            all_experiments = ExperimentList()
            all_reflections = flex.reflection_table()
            starting_expts_count = starting_refls_count = 0
        else:
            starting_expts_count = len(all_experiments)
            starting_refls_count = len(all_reflections)
        self.logger.log(
            "Initial number of experiments: %d; Initial number of reflections: %d"
            % (starting_expts_count, starting_refls_count))

        # Generate and send a list of file paths to each worker
        if self.mpi_helper.rank == 0:
            file_list = self.get_list()
            self.logger.log(
                "Built an input list of %d json/pickle file pairs" %
                (len(file_list)))
            self.params.input.path = None  # Rank 0 has already parsed the input parameters
            per_rank_file_list = file_load_calculator(self.params, file_list, self.logger).\
                                    calculate_file_load(available_rank_count = self.mpi_helper.size)
            self.logger.log(
                'Transmitting a list of %d lists of json/pickle file pairs' %
                (len(per_rank_file_list)))
            transmitted = per_rank_file_list
        else:
            transmitted = None

        self.logger.log_step_time("BROADCAST_FILE_LIST")
        transmitted = self.mpi_helper.comm.bcast(transmitted, root=0)
        new_file_list = transmitted[
            self.mpi_helper.
            rank] if self.mpi_helper.rank < len(transmitted) else None
        self.logger.log_step_time("BROADCAST_FILE_LIST", True)

        # Load the data
        self.logger.log_step_time("LOAD")
        if new_file_list is not None:
            self.logger.log("Received a list of %d json/pickle file pairs" %
                            len(new_file_list))
            for experiments_filename, reflections_filename in new_file_list:
                experiments = ExperimentListFactory.from_json_file(
                    experiments_filename, check_format=False)
                reflections = flex.reflection_table.from_file(
                    reflections_filename)
                # NOTE: had to use slicing below because it selection no longer works...
                reflections.sort("id")
                unique_refl_ids = set(reflections['id'])
                assert len(unique_refl_ids) == len(
                    experiments
                ), "refl table and experiment list should contain data on same experiment "  # TODO: decide if this is true
                assert min(
                    reflections["id"]
                ) >= 0, "No more -1 in the id column, ideally it should be the numerical index of experiment, but beware that this is not enforced anywhere in the upstream code base"

                if 'intensity.sum.value' in reflections:
                    reflections[
                        'intensity.sum.value.unmodified'] = reflections[
                            'intensity.sum.value'] * 1
                if 'intensity.sum.variance' in reflections:
                    reflections[
                        'intensity.sum.variance.unmodified'] = reflections[
                            'intensity.sum.variance'] * 1

                for experiment_id, experiment in enumerate(experiments):
                    if experiment.identifier is None or len(
                            experiment.identifier) == 0:
                        experiment.identifier = create_experiment_identifier(
                            experiment, experiments_filename, experiment_id)

                    all_experiments.append(experiment)

                    # select reflections of the current experiment
                    # FIXME the selection was broke for me, it raised
                    #    RuntimeError: boost::bad_get: failed value get using boost::get
                    #refls = reflections.select(reflections['id'] == experiment_id)
                    # NOTE: this is a hack due to the broken expereimnt_id selection above
                    exp_id_pos = np.where(
                        reflections['id'] == experiment_id)[0]
                    assert exp_id_pos.size, "no refls in this experiment"  # NOTE: maybe we can relax this assertion ?
                    refls = reflections[exp_id_pos[0]:exp_id_pos[-1] + 1]

                    #FIXME: how will this work if reading in multiple composite mode experiment jsons?
                    # Reflection experiment 'id' is supposed to be unique within this rank; 'exp_id' (i.e. experiment identifier) is supposed to be unique globally
                    refls['exp_id'] = flex.std_string(len(refls),
                                                      experiment.identifier)

                    new_id = 0
                    if len(all_reflections) > 0:
                        new_id = max(all_reflections['id']) + 1

                    # FIXME: it is hard to interperet that a function call returning a changeable property
                    eid = refls.experiment_identifiers()
                    for k in eid.keys():
                        del eid[k]
                    eid[new_id] = experiment.identifier
                    refls['id'] = flex.int(len(refls), new_id)
                    all_reflections.extend(refls)
        else:
            self.logger.log("Received a list of 0 json/pickle file pairs")
        self.logger.log_step_time("LOAD", True)

        self.logger.log('Read %d experiments consisting of %d reflections' %
                        (len(all_experiments) - starting_expts_count,
                         len(all_reflections) - starting_refls_count))
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        from xfel.merging.application.reflection_table_utils import reflection_table_utils
        all_reflections = reflection_table_utils.prune_reflection_table_keys(
            reflections=all_reflections,
            keys_to_keep=[
                'intensity.sum.value', 'intensity.sum.variance',
                'miller_index', 'miller_index_asymmetric', 'exp_id', 's1',
                'intensity.sum.value.unmodified',
                'intensity.sum.variance.unmodified'
            ])
        self.logger.log("Pruned reflection table")
        self.logger.log("Memory usage: %d MB" % get_memory_usage())

        # Do we have any data?
        from xfel.merging.application.utils.data_counter import data_counter
        data_counter(self.params).count(all_experiments, all_reflections)

        return all_experiments, all_reflections