Example #1
0
def test_scale_and_filter_results_logging():
    """Test ScaleAndFilter.log_cycle_results method."""
    results = AnalysisResults()
    scaling_script = mock.Mock()
    scaling_script.merging_statistics_result = "stats_results"
    scaling_script.scaled_miller_array.size.return_value = 1000

    filter_script = mock.Mock()
    filter_script.results_summary = {
        "dataset_removal": {
            "mode": "image_group",
            "image_ranges_removed": [[(6, 10), 0]],
            "experiments_fully_removed": [],
            "experiment_ids_fully_removed": [],
            "n_reflections_removed": 50,
        },
        "mean_cc_half": 80.0,
        "per_dataset_delta_cc_half_values": {
            "delta_cc_half_values": [-0.1, 0.1, -0.2, 0.2]
        },
    }

    def _parse_side_effect(*args):
        return args[0]

    with mock.patch.object(results,
                           "_parse_merging_stats",
                           side_effect=_parse_side_effect):
        res = log_cycle_results(results, scaling_script, filter_script)
        # test things have been logged correctly
        cycle_results = res.get_cycle_results()
        assert len(cycle_results) == 1
        assert cycle_results[0]["cumul_percent_removed"] == 100 * 50.0 / 1000.0
        assert cycle_results[0]["n_removed"] == 50
        assert cycle_results[0]["image_ranges_removed"] == [[(6, 10), 0]]
        assert cycle_results[0]["removed_datasets"] == []
        assert cycle_results[0]["delta_cc_half_values"] == [
            -0.1, 0.1, -0.2, 0.2
        ]
        assert res.get_merging_stats()[0] == "stats_results"
        assert res.initial_n_reflections == 1000

    # add another cycle of results
    with mock.patch.object(results,
                           "_parse_merging_stats",
                           side_effect=_parse_side_effect):
        res = log_cycle_results(res, scaling_script, filter_script)
        cycle_results = res.get_cycle_results()
        assert len(cycle_results) == 2
        assert cycle_results[1][
            "cumul_percent_removed"] == 100 * 2 * 50.0 / 1000.0
        assert cycle_results[1]["n_removed"] == 50
        assert cycle_results[1]["image_ranges_removed"] == [[(6, 10), 0]]
        assert cycle_results[1]["removed_datasets"] == []
        assert cycle_results[0]["delta_cc_half_values"] == [
            -0.1, 0.1, -0.2, 0.2
        ]
        assert res.get_merging_stats()[1] == "stats_results"
        assert res.initial_n_reflections == 1000
Example #2
0
def test_analysis_results_to_from_dict():
    d = {
        "termination_reason": "made up",
        "initial_expids_and_image_ranges": [["foo", [1, 42]], ["bar", [1,
                                                                       10]]],
        "expids_and_image_ranges": [["foo", [1, 42]]],
        "cycle_results": {
            "1": {
                "some stat": -424242
            }
        },
        "initial_n_reflections": 424242,
        "final_stats": "some final stats",
    }
    results = AnalysisResults.from_dict(d)
    # The cycle_results dict output by AnalysisResults has integer keys but after
    # conversion to json has str keys. AnalysisResults.from_dict expects str keys,
    # hence do the comparison after converting to/from json
    assert json.loads(json.dumps(results.to_dict())) == d
Example #3
0
    def run(self):
        """Run cycles of scaling and filtering."""
        with ScalingHTMLContextManager(self):
            start_time = time.time()
            results = AnalysisResults()

            for counter in range(
                    1, self.params.filtering.deltacchalf.max_cycles + 1):
                self.run_scaling_cycle()

                if counter == 1:
                    results.initial_expids_and_image_ranges = [
                        (exp.identifier,
                         exp.scan.get_image_range()) if exp.scan else None
                        for exp in self.experiments
                    ]

                delta_cc_params = deltacc_phil_scope.extract()
                delta_cc_params.mode = self.params.filtering.deltacchalf.mode
                delta_cc_params.group_size = (
                    self.params.filtering.deltacchalf.group_size)
                delta_cc_params.stdcutoff = self.params.filtering.deltacchalf.stdcutoff
                logger.info("\nPerforming a round of filtering.\n")

                # need to reduce to single table.
                joined_reflections = flex.reflection_table()
                for table in self.reflections:
                    joined_reflections.extend(table)

                script = deltaccscript(delta_cc_params, self.experiments,
                                       joined_reflections)
                script.run()

                valid_image_ranges = get_valid_image_ranges(self.experiments)
                results.expids_and_image_ranges = [
                    (exp.identifier,
                     valid_image_ranges[i]) if exp.scan else None
                    for i, exp in enumerate(self.experiments)
                ]

                self.experiments = script.experiments
                self.params.dataset_selection.use_datasets = None
                self.params.dataset_selection.exclude_datasets = None

                results = log_cycle_results(results, self, script)
                logger.info(
                    "Cycle %s of filtering, n_reflections removed this cycle: %s",
                    counter,
                    results.get_last_cycle_results()["n_removed"],
                )

                # Test termination conditions
                latest_results = results.get_last_cycle_results()
                if latest_results["n_removed"] == 0:
                    logger.info(
                        "Finishing scaling and filtering as no data removed in this cycle."
                    )
                    if self.params.scaling_options.full_matrix:
                        self.reflections = parse_multiple_datasets(
                            [script.filtered_reflection_table])
                        results = self._run_final_scale_cycle(results)
                    else:
                        self.reflections = [script.filtered_reflection_table]
                    results.finish(termination_reason="no_more_removed")
                    break

                # Need to split reflections for further processing.
                self.reflections = parse_multiple_datasets(
                    [script.filtered_reflection_table])

                if (latest_results["cumul_percent_removed"] >
                        self.params.filtering.deltacchalf.max_percent_removed):
                    logger.info(
                        "Finishing scale and filtering as have now removed more than the limit."
                    )
                    results = self._run_final_scale_cycle(results)
                    results.finish(termination_reason="max_percent_removed")
                    break

                if self.params.filtering.deltacchalf.min_completeness:
                    if (latest_results["merging_stats"]["completeness"] <
                            self.params.filtering.deltacchalf.min_completeness
                        ):
                        logger.info(
                            "Finishing scaling and filtering as completeness now below cutoff."
                        )
                        results = self._run_final_scale_cycle(results)
                        results.finish(
                            termination_reason="below_completeness_limit")
                        break

                if counter == self.params.filtering.deltacchalf.max_cycles:
                    logger.info("Finishing as reached max number of cycles.")
                    results = self._run_final_scale_cycle(results)
                    results.finish(termination_reason="max_cycles")
                    break

                # If not finished then need to create new scaler to try again
                self._create_model_and_scaler()
            self.filtering_results = results
            # Print summary of results
            logger.info(results)
            with open(self.params.filtering.output.scale_and_filter_results,
                      "w") as f:
                json.dump(self.filtering_results.to_dict(), f, indent=2)
            # All done!
            logger.info("\nTotal time taken: %.4fs ", time.time() - start_time)
            logger.info("%s%s%s", "\n", "=" * 80, "\n")