def test_set_Ih_values_to_target(test_sg): """Test the setting of Ih values for targeted scaling.""" """Generate input for testing joint_Ih_table.""" Ih_table = IhTable( [generated_refl_for_splitting_1(), generated_refl_for_splitting_2()], test_sg, nblocks=2, ) Ih_table.calc_Ih() # First check that values are set up correctly. block_list = Ih_table.blocked_data_list assert list(block_list[0].Ih_values) == pytest.approx( [6.0, 17.0 / 3.0, 17.0 / 3.0, 6.0, 17.0 / 3.0]) assert list(block_list[1].Ih_values) == pytest.approx( [16.0 / 3.0, 16.0 / 3.0, 7.0, 16.0 / 3.0, 7.0, 7.0]) # set some values in the target # change the (2, 0, 0) reflections to (4, 0, 0) to test if they are removed # from the blocks t1 = generated_refl_for_splitting_1() t2 = generated_refl_for_splitting_2() t1["miller_index"][1] = (4, 0, 0) t1["miller_index"][5] = (4, 0, 0) t2["miller_index"][1] = (4, 0, 0) target = IhTable([t1, t2], test_sg, nblocks=1) target.blocked_data_list[0].Ih_table["Ih_values"] = np.array( [0.1, 0.2, 0.2, 0.3, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.4]) for block in block_list: block.match_Ih_values_to_target(target) assert list(block_list[0].Ih_values) == [0.1, 0.2, 0.2, 0.1, 0.2] assert list(block_list[1].Ih_values) == [0.4, 0.4, 0.4]
def test_set_Ih_values_to_target(test_sg): """Test the setting of Ih values for targeted scaling.""" """Generate input for testing joint_Ih_table.""" Ih_table = IhTable( [generated_refl_for_splitting_1(), generated_refl_for_splitting_2()], test_sg, nblocks=2, ) Ih_table.calc_Ih() # First check that values are set up correctly. block_list = Ih_table.blocked_data_list assert list(block_list[0].Ih_values) == [ 6.0, 17.0 / 3.0, 17.0 / 3.0, 6.0, 17.0 / 3.0, ] assert list(block_list[1].Ih_values) == [ 16.0 / 3.0, 16.0 / 3.0, 7.0, 16.0 / 3.0, 7.0, 7.0, ] target = IhTable( [generated_refl_for_splitting_1(), generated_refl_for_splitting_2()], test_sg, nblocks=1, ) # set some values in the target # change the (2, 0, 0) reflections to (4, 0, 0) to test if they are removed # from the blocks vals = target.blocked_data_list[0].asu_miller_index vals[3] = (4, 0, 0) vals[4] = (4, 0, 0) vals[8] = (4, 0, 0) target.blocked_data_list[0].Ih_table["Ih_values"] = flex.double( [0.1, 0.2, 0.2, 0.3, 0.3, 0.4, 0.1, 0.2, 0.3, 0.4, 0.4]) for block in block_list: block.match_Ih_values_to_target(target) assert list(block_list[0].Ih_values) == [0.1, 0.2, 0.2, 0.1, 0.2] assert list(block_list[1].Ih_values) == [0.4, 0.4, 0.4]
def test_error_model_target(large_reflection_table, test_sg): """Test the error model target.""" Ih_table = IhTable([large_reflection_table], test_sg, nblocks=1) block = Ih_table.blocked_data_list[0] em = get_error_model("basic") em.min_reflections_required = 1 error_model = em(block, n_bins=2, min_Ih=2.0) error_model.update_for_minimisation([1.0, 0.05]) target = ErrorModelTarget(error_model, starting_values=[1.0, 0.05]) # Test residual calculation residuals = target.calculate_residuals() assert residuals == (flex.double(2, 1.0) - error_model.bin_variances)**2 # Test gradient calculation against finite differences. gradients = target.calculate_gradients() gradient_fd = calculate_gradient_fd(target) assert approx_equal(list(gradients), list(gradient_fd)) # Test the method calls r, g = target.compute_functional_gradients() assert r == residuals assert list(gradients) == list(g) r, g, c = target.compute_functional_gradients_and_curvatures() assert r == residuals assert list(gradients) == list(g) assert c is None
def refine_error_model(params, experiments, reflection_tables): """Do error model refinement.""" # prepare relevant data for datastructures for i, table in enumerate(reflection_tables): # First get the good data table = table.select(~table.get_flags(table.flags.bad_for_scaling, all=False)) # Now chose intensities, ideally these two options could be combined # with a smart refactor if params.intensity_choice == "combine": if not params.combine.Imid: sys.exit("Imid value must be provided if intensity_choice=combine") table = calculate_prescaling_correction(table) # needed for below. I, V = combine_intensities(table, params.combine.Imid) table["intensity"] = I table["variance"] = V else: table = choose_initial_scaling_intensities( table, intensity_choice=params.intensity_choice ) reflection_tables[i] = table space_group = experiments[0].crystal.get_space_group() Ih_table = IhTable( reflection_tables, space_group, additional_cols=["partiality"], anomalous=True ) # now do the error model refinement model = BasicErrorModel(basic_params=params.basic) try: model = run_error_model_refinement(model, Ih_table) except (ValueError, RuntimeError) as e: logger.info(e) else: return model
def create_Ih_table(experiments, reflections, selections=None, n_blocks=1): """Create an Ih table from a list of experiments and reflections. Optionally, a selection list can also be given, to select data from each reflection table. Allow an unequal number of experiments and reflections, as only need to extract one space group value (can optionally check all same if many).""" if selections: assert len(selections) == len( reflections), """Must have an equal number of reflection tables and selections in the input lists.""" space_group_0 = experiments[0].crystal.get_space_group() for experiment in experiments: assert (experiment.crystal.get_space_group() == space_group_0 ), """The space groups of all experiments must be equal.""" input_tables = [] indices_lists = [] for i, reflection in enumerate(reflections): if "inverse_scale_factor" not in reflection: reflection["inverse_scale_factor"] = flex.double( reflection.size(), 1.0) if selections: input_tables.append(reflection.select(selections[i])) indices_lists.append(selections[i].iselection()) else: input_tables.append(reflection) indices_lists = None Ih_table = IhTable(input_tables, space_group_0, indices_lists, nblocks=n_blocks) return Ih_table
def test_error_model_target(large_reflection_table, test_sg): """Test the error model target.""" Ih_table = IhTable([large_reflection_table], test_sg, nblocks=1) block = Ih_table.blocked_data_list[0] em = BasicErrorModel em.min_reflections_required = 1 params = generated_param() params.weighting.error_model.basic.n_bins = 2 params.weighting.error_model.basic.min_Ih = 1.0 error_model = em(basic_params=params.weighting.error_model.basic) error_model.configure_for_refinement(block) error_model.parameters = [1.0, 0.05] parameterisation = ErrorModelB_APM(error_model) target = ErrorModelTargetB(error_model) target.predict(parameterisation) # Test residual calculation residuals = target.calculate_residuals(parameterisation) assert residuals == ( flex.double(2, 1.0) - flex.pow2(error_model.binner.binning_info["bin_variances"])) # Test gradient calculation against finite differences. gradients = target.calculate_gradients(parameterisation) gradient_fd = calculate_gradient_fd(target, parameterisation) assert list(gradients) == pytest.approx(list(gradient_fd)) # Test the method calls r, g = target.compute_functional_gradients(parameterisation) assert r == residuals assert list(gradients) == pytest.approx(list(g)) r, g = target.compute_functional_gradients(parameterisation) assert r == residuals assert list(gradients) == pytest.approx(list(g))
def test_reflection_selection(dials_regression): """Use a real dataset to test the selection algorithm.""" data_dir = os.path.join(dials_regression, "xia2-28") pickle_path = os.path.join(data_dir, "20_integrated.pickle") sequence_path = os.path.join(data_dir, "20_integrated_experiments.json") reflection_table = flex.reflection_table.from_file(pickle_path) experiment = load.experiment_list(sequence_path, check_format=False)[0] reflection_table["intensity"] = reflection_table["intensity.sum.value"] reflection_table["variance"] = reflection_table["intensity.sum.variance"] reflection_table["inverse_scale_factor"] = flex.double( reflection_table.size(), 1.0) reflection_table = reflection_table.select( reflection_table["variance"] > 0) reflection_table = reflection_table.select( reflection_table.get_flags(reflection_table.flags.integrated, all=True)) Ih_table_block = IhTable( [reflection_table], experiment.crystal.get_space_group()).Ih_table_blocks[0] reflection_table["phi"] = (reflection_table["xyzobs.px.value"].parts()[2] * experiment.scan.get_oscillation()[1]) reflection_table = calc_crystal_frame_vectors(reflection_table, experiment) Ih_table_block.Ih_table["s1c"] = reflection_table["s1c"].select( Ih_table_block.Ih_table["loc_indices"]) indices = select_highly_connected_reflections(Ih_table_block, experiment, min_per_area=10, n_resolution_bins=10) assert len(indices) > 1710 and len(indices) < 1800 # Give a high min_per_area to check that all reflections with multiplciity > 1 # are selected. indices = select_highly_connected_reflections(Ih_table_block, experiment, min_per_area=50, n_resolution_bins=10) # this dataset has 48 reflections with multiplicity = 1 assert len(indices) == reflection_table.size() - 48
def test_select_connected_reflections_across_datasets(): """Test the basic cross-dataset reflection selection algorithm. Make three reflection tables with the following reflections: symmetry groups 0 1 2 3 4 5 6 0 3 3 2 0 1 1 1 classes 1 0 2 0 0 3 2 1 2 2 1 1 5 0 4 0 With target=5, expect: number of chosen reflections per class: [8, 7, 7] symmetry groups used: [1, 5, 0, 4] """ n1 = [3, 3, 2, 0, 1, 1, 1] n2 = [0, 2, 0, 0, 3, 2, 1] n3 = [2, 1, 1, 5, 0, 4, 0] def make_refl_table(n_list, class_idx=0): """Make a reflection table with groups based on n_list.""" r1 = flex.reflection_table() miller_indices = [[(0, 0, i + 1)] * n for i, n in enumerate(n_list)] r1["miller_index"] = flex.miller_index( list(itertools.chain.from_iterable(miller_indices))) r1["intensity"] = flex.double(sum(n_list), 1) r1["variance"] = flex.double(sum(n_list), 1) r1["inverse_scale_factor"] = flex.double(sum(n_list), 1) r1["class_index"] = flex.int(sum(n_list), class_idx) return r1 reflections = [ make_refl_table(n1, 0), make_refl_table(n2, 1), make_refl_table(n3, 2), ] space_group = sgtbx.space_group("P1") table = IhTable(reflections, space_group) indices, datset_ids, total_in_classes = select_connected_reflections_across_datasets( table, min_per_class=5, Isigma_cutoff=0.0) assert list(total_in_classes) == [8, 7, 7] assert list(indices) == [0, 1, 2, 3, 4, 5, 8, 9] + [0, 1, 2, 3, 4, 5, 6 ] + [ 0, 1, 2, 9, 10, 11, 12, ] assert list(datset_ids) == [0] * 8 + [1] * 7 + [2] * 7
def test_select_highly_connected_reflections_in_bin(): """Test the single-bin selection algorithm.""" r1 = flex.reflection_table() n_list = [3, 3, 2, 1, 1, 2, 2] miller_indices = [[(0, 0, i + 1)] * n for i, n in enumerate(n_list)] r1["miller_index"] = flex.miller_index( list(itertools.chain.from_iterable(miller_indices))) r1["class_index"] = flex.int([0, 1, 1, 0, 1, 2, 0, 0, 2, 1, 1, 2, 0, 1]) r1["intensity"] = flex.double(sum(n_list), 1) r1["variance"] = flex.double(sum(n_list), 1) r1["inverse_scale_factor"] = flex.double(sum(n_list), 1) sg = sgtbx.space_group("P1") Ih_table_block = IhTable([r1], sg).Ih_table_blocks[0] Ih_table_block.Ih_table["class_index"] = r1["class_index"].select( Ih_table_block.Ih_table["loc_indices"]) indices, total_in_classes = select_highly_connected_reflections_in_bin( Ih_table_block, min_per_class=2, min_total=6, max_total=100) assert list(total_in_classes) == [2, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0] assert list(indices) == [0, 1, 2, 3, 4, 5, 10, 11]
def test_errormodel(large_reflection_table, test_sg): """Test the initialisation and methods of the error model.""" Ih_table = IhTable([large_reflection_table], test_sg, nblocks=1) block = Ih_table.blocked_data_list[0] params = generated_param() params.weighting.error_model.basic.n_bins = 2 params.weighting.error_model.basic.min_Ih = 1.0 em = BasicErrorModel em.min_reflections_required = 1 error_model = em(basic_params=params.weighting.error_model.basic) error_model.min_reflections_required = 1 error_model.configure_for_refinement(block) assert error_model.binner.summation_matrix[0, 1] == 1 assert error_model.binner.summation_matrix[1, 1] == 1 assert error_model.binner.summation_matrix[2, 0] == 1 assert error_model.binner.summation_matrix[3, 0] == 1 assert error_model.binner.summation_matrix[4, 0] == 1 assert error_model.binner.summation_matrix.non_zeroes == 5 assert list(error_model.binner.binning_info["refl_per_bin"]) == [3, 2] # Test calc sigmaprime x0 = 1.0 x1 = 0.1 sigmaprime = calc_sigmaprime([x0, x1], error_model.filtered_Ih_table) cal_sigpr = list( x0 * flex.sqrt(block.variances + flex.pow2(x1 * block.intensities)) / block.inverse_scale_factors) assert list(sigmaprime) == pytest.approx(cal_sigpr[4:7] + cal_sigpr[-2:]) # Test calc delta_hl sigmaprime = calc_sigmaprime([1.0, 0.0], error_model.filtered_Ih_table) # Reset # Calculate example for three elements, with intensities 1, 5 and 10 and # variances 1, 5 and 10 using he formula # delta_hl = math.sqrt(n_h - 1 / n_h) * (Ihl/ghl - Ih) / sigmaprime delta_hl = calc_deltahl( error_model.filtered_Ih_table, error_model.filtered_Ih_table.calc_nh(), sigmaprime, ) expected_deltas = [ (-3.0 / 2.0) * math.sqrt(2.0 / 3.0), (5.0 / 2.0) * math.sqrt(2.0 / 15.0), 5.0 * math.sqrt(2.0 / 30.0), -0.117647058824, 0.124783549621, ] assert list(delta_hl) == pytest.approx(expected_deltas)
def reject_outliers(reflection_table, experiment, method="standard", zmax=6.0): """ Run an outlier algorithm on symmetry-equivalent intensities. This method runs an intensity-based outlier rejection algorithm, comparing the deviations from the weighted mean in groups of symmetry equivalent reflections. The outliers are determined and the outlier_in_scaling flag is set in the reflection table. The values intensity and variance must be set in the reflection table; these should be corrected but unscaled values, as an inverse_scale_factor will be applied during outlier rejection if this is present in the reflection table. The reflection table should also be prefiltered (e.g. not-integrated reflections should not be present) as no further filtering is done on the input table. Args: reflection_table: A reflection table. experiment: A single experiment object. method (str): Name (alias) of outlier rejection algorithm to use. zmax (float): Normalised deviation threshold for classifying an outlier. Returns: reflection_table: The input table with the outlier_in_scaling flag set. """ assert "intensity" in reflection_table, "reflection table has no 'intensity' column" assert "variance" in reflection_table, "reflection table has no 'variance' column" if not "inverse_scale_factor" in reflection_table: reflection_table["inverse_scale_factor"] = flex.double( reflection_table.size(), 1.0) Ih_table = IhTable([reflection_table], experiment.crystal.get_space_group(), nblocks=1) outlier_indices = determine_outlier_index_arrays(Ih_table, method=method, zmax=zmax)[0] # Unset any existing outlier flags before setting the new ones reflection_table.unset_flags( reflection_table.get_flags(reflection_table.flags.outlier_in_scaling), reflection_table.flags.outlier_in_scaling, ) reflection_table.set_flags(outlier_indices, reflection_table.flags.outlier_in_scaling) return reflection_table
def test_target_gradient_calculation_finite_difference(small_reflection_table, single_exp, physical_param): """Test the calculated gradients against a finite difference calculation.""" model = PhysicalScalingModel.from_data(physical_param, single_exp, small_reflection_table) # need to 'add_data' model.configure_components(small_reflection_table, single_exp, physical_param) model.components["scale"].update_reflection_data() model.components["decay"].update_reflection_data() apm = multi_active_parameter_manager( ScalingTarget(), [model.components], [["scale", "decay"]], scaling_active_parameter_manager, ) model.components["scale"].inverse_scales = flex.double([2.0, 1.0, 2.0]) model.components["decay"].inverse_scales = flex.double([1.0, 1.0, 0.4]) Ih_table = IhTable([small_reflection_table], single_exp.crystal.get_space_group()) with patch.object(SingleScaler, "__init__", lambda x, y, z, k: None): scaler = SingleScaler(None, None, None) scaler._Ih_table = Ih_table # Now do finite difference check. target = ScalingTarget() scaler.update_for_minimisation(apm, 0) grad = target.calculate_gradients(scaler.Ih_table.blocked_data_list[0]) res = target.calculate_residuals(scaler.Ih_table.blocked_data_list[0]) assert (res > 1e-8), """residual should not be zero, or the gradient test below will not really be working!""" # Now compare to finite difference f_d_grad = calculate_gradient_fd(target, scaler, apm) print(list(f_d_grad)) print(list(grad)) assert list(grad) == pytest.approx(list(f_d_grad)) sel = f_d_grad > 1e-8 assert sel, """assert sel has some elements, as finite difference grad should
def test_error_model_on_simulated_data(background_variance, multiplicity, abs_tolerances, model_b): """Test the refinement of the error model using simulated data. The simulated data consists of 2150 unique reflections, with I = 5.0 * d^4, giving an intensity range from 122070.3 to 5.0, with a mean of 285.21 and a median of 13.76. Each reflection is sampled 'multiplicity' times from a poisson distribution to give the intensities, which is scaled away from the mean I using the model_bm factor (scaled such that when the model is refined, the correct b should be returned). The test uses three different representative levels of background variance (1=None, 5=low, 50=high), which is added to the variance from poisson counting statistics. A tolerance of 10 % is used for the model b parameter for good cases, which is increased to 20 % for tough cases. The purpose of the test is to show that the error model is finding the correct solution for a variety of background levels, with varying levels of systematic error (model b parameter, which is the one that has the highest effect on the errors output by scaling). Included are some more realistic cases, with lower multiplicity of measurement and high background variances. These tests are also designed to help validate the cutoff choices in the error model code: - the need for a min_Ih to ensure that poisson approx normal distribution - the need for an avg_I_over_var cutoff to remove background effects - cutting off extreme 'outlier' deviations as not to mislead the model.""" data = data_for_error_model_test(int(background_variance), int(multiplicity), b=model_b) Ih_table = IhTable([data], space_group("P 2ac 2ab")) em = get_error_model("basic") block = Ih_table.blocked_data_list[0] em.min_reflections_required = 250 error_model = em(block, n_bins=10) assert error_model.summation_matrix.n_rows > 400 refinery = error_model_refinery(engine="SimpleLBFGS", target=ErrorModelTarget(error_model), max_iterations=100) refinery.run() error_model = refinery.return_error_model() assert error_model.refined_parameters[0] == pytest.approx( 1.00, abs=abs_tolerances[0]) assert abs(error_model.refined_parameters[1]) == pytest.approx( model_b, abs=abs_tolerances[1])
def test_multi_dataset_outlier_rejection(test_sg): """Test outlier rejection with two datasets.""" rt1 = flex.reflection_table() rt1["intensity"] = flex.double( [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 20.0, 400.0, 10.0]) rt1["variance"] = flex.double(9, 1.0) rt1["inverse_scale_factor"] = flex.double(9, 1.0) rt1["miller_index"] = flex.miller_index([ (0, 0, 1), (0, 0, 1), (0, 0, 1), (0, 0, 1), (0, 0, 2), (0, 0, 2), (0, 0, 2), (0, 0, 2), (0, 0, 3), ]) rt1.set_flags( flex.bool( [True, False, False, False, False, False, False, False, False]), rt1.flags.excluded_for_scaling, ) rt1.set_flags(flex.bool(9, False), rt1.flags.user_excluded_in_scaling) rt2 = flex.reflection_table() rt2["intensity"] = flex.double([10.0, 20.0, 500.0]) rt2["variance"] = flex.double(3, 1.0) rt2["inverse_scale_factor"] = flex.double(3, 1.0) rt2["miller_index"] = flex.miller_index([(0, 0, 23), (0, 0, 1), (0, 0, 2)]) rt2.set_flags(flex.bool([False, False, False]), rt1.flags.excluded_for_scaling) rt2.set_flags(flex.bool(3, False), rt2.flags.user_excluded_in_scaling) Ih_table = IhTable([rt1, rt2], test_sg, nblocks=1) zmax = 6.0 outlier_rej = SimpleNormDevOutlierRejection(Ih_table, zmax) outlier_rej.run() outliers = outlier_rej.final_outlier_arrays assert len(outliers) == 2 assert list(outliers[0]) == [4, 5, 6, 7] assert list(outliers[1]) == [1, 2] outlier_rej = NormDevOutlierRejection(Ih_table, zmax) outlier_rej.run() outliers = outlier_rej.final_outlier_arrays assert len(outliers) == 2 assert list(outliers[0]) == [7, 6] assert list(outliers[1]) == [1, 2]
def test_target_jacobian_calculation_finite_difference(physical_param, single_exp, large_reflection_table): """Test the calculated jacobian against a finite difference calculation.""" physical_param.physical.decay_correction = False model = PhysicalScalingModel.from_data(physical_param, single_exp, large_reflection_table) # need to 'add_data' model.configure_components(large_reflection_table, single_exp, physical_param) model.components["scale"].update_reflection_data() apm = multi_active_parameter_manager( ScalingTarget(), [model.components], [["scale"]], scaling_active_parameter_manager, ) Ih_table = IhTable([large_reflection_table], single_exp.crystal.get_space_group()) with patch.object(SingleScaler, "__init__", lambda x, y, z, k: None): scaler = SingleScaler(None, None, None) scaler._Ih_table = Ih_table target = ScalingTarget() scaler.update_for_minimisation(apm, 0) fd_jacobian = calculate_jacobian_fd(target, scaler, apm) r, jacobian, w = target.compute_residuals_and_gradients( scaler.Ih_table.blocked_data_list[0]) assert r == pytest.approx( [-50.0 / 3.0, 70.0 / 3.0, -20.0 / 3.0, 12.5, -2.5] + [-25.0, 0.0, -75.0, 0.0, 200.0]) assert w == pytest.approx( [0.1, 0.1, 0.1, 0.02, 0.1, 0.02, 0.01, 0.02, 0.01, 0.01]) n_rows = jacobian.n_rows n_cols = jacobian.n_cols print(jacobian) print(fd_jacobian) for i in range(0, n_rows): for j in range(0, n_cols): assert jacobian[i, j] == pytest.approx(fd_jacobian[i, j], abs=1e-4)
def test_determine_Esq_outlier_index_arrays( generated_Ih_table, mock_exp_with_sg, test_sg ): # Set the emax lower to check that two reflections are identified as outliers outliers = determine_Esq_outlier_index_arrays( generated_Ih_table, mock_exp_with_sg, emax=1.5 ) assert list(outliers[0]) == [8, 9] # now split the dataset into two, to check the output is correctly formed rt = generate_outlier_table() rt1 = rt[0:9] rt2 = rt[9:] Ih_table = IhTable([rt1, rt2], test_sg) outliers = determine_Esq_outlier_index_arrays(Ih_table, mock_exp_with_sg, emax=1.5) assert list(outliers[0]) == [8] assert list(outliers[1]) == [0] assert len(outliers) == 2
def test_errormodel(large_reflection_table, test_sg): """Test the initialisation and methods of the error model.""" # first test get_error_model helper function. with pytest.raises(ValueError): em = get_error_model("bad") em = get_error_model("basic") em.min_reflections_required = 1 Ih_table = IhTable([large_reflection_table], test_sg, nblocks=1) block = Ih_table.blocked_data_list[0] error_model = em(block, n_bins=2, min_Ih=1.0) assert error_model.summation_matrix[0, 1] == 1 assert error_model.summation_matrix[1, 1] == 1 assert error_model.summation_matrix[2, 0] == 1 assert error_model.summation_matrix[3, 0] == 1 assert error_model.summation_matrix[4, 0] == 1 assert error_model.summation_matrix.non_zeroes == 5 assert list(error_model.bin_counts) == [3, 2] # Test calc sigmaprime x0 = 1.0 x1 = 0.1 error_model.sigmaprime = error_model.calc_sigmaprime([x0, x1]) cal_sigpr = list(x0 * ((block.variances + ((x1 * block.intensities)**2))**0.5) / block.inverse_scale_factors) assert list(error_model.sigmaprime) == pytest.approx(cal_sigpr[4:7] + cal_sigpr[-2:]) # Test calc delta_hl error_model.sigmaprime = error_model.calc_sigmaprime([1.0, 0.0]) # Reset # Calculate example for three elements, with intensities 1, 5 and 10 and # variances 1, 5 and 10 using he formula # delta_hl = math.sqrt(n_h - 1 / n_h) * (Ihl/ghl - Ih) / sigmaprime error_model.delta_hl = error_model.calc_deltahl() expected_deltas = [ (-3.0 / 2.0) * math.sqrt(2.0 / 3.0), (5.0 / 2.0) * math.sqrt(2.0 / 15.0), 5.0 * math.sqrt(2.0 / 30.0), -0.117647058824, 0.124783549621, ] assert list(error_model.delta_hl) == pytest.approx(expected_deltas)
def test_outlier_rejection_with_small_outliers(): rt = flex.reflection_table() rt["intensity"] = flex.double( [3560.84231, 3433.66407, 3830.64235, 0.20552, 3786.59537] + [4009.98652, 0.00000, 3578.91470, 3549.19151, 3379.58616] + [3686.38610, 3913.42869, 0.00000, 3608.84869, 3681.11110]) rt["variance"] = flex.double( [10163.98104, 9577.90389, 9702.84868, 3.77427, 8244.70685] + [9142.38221, 1.51118, 9634.53782, 9870.73103, 9078.23488] + [8977.26984, 8712.91360, 1.78802, 7473.26521, 10075.49862]) rt["inverse_scale_factor"] = flex.double(rt.size(), 1.0) rt["miller_index"] = flex.miller_index([(0, 0, 1)] * rt.size()) expected_outliers = [3, 6, 12] OutlierRej = NormDevOutlierRejection(IhTable([rt], space_group("P 1")), zmax=6.0) OutlierRej.run() outliers = OutlierRej.final_outlier_arrays assert len(outliers) == 1 assert set(outliers[0]) == set(expected_outliers)
def test_limit_outlier_weights(): rt = flex.reflection_table() rt["intensity"] = flex.double([100.0, 101.0, 109.0, 105.0, 1.0]) rt["variance"] = flex.double([100.0, 101.0, 109.0, 105.0, 1.0]) rt["inverse_scale_factor"] = flex.double(rt.size(), 1.0) rt["miller_index"] = flex.miller_index([(0, 0, 1)] * rt.size()) rt2 = flex.reflection_table() rt2["intensity"] = flex.double([100.0, 101.0, 102.0, 105.0, 1.0]) rt2["variance"] = flex.double([100.0, 101.0, 102.0, 105.0, 1.0]) rt2["inverse_scale_factor"] = flex.double(rt.size(), 1.0) rt2["miller_index"] = flex.miller_index([(0, 0, 1)] * rt.size()) table = IhTable([rt, rt2], space_group("P 1")) import copy new_weights = limit_outlier_weights( copy.deepcopy(table.Ih_table_blocks[0].weights), table.Ih_table_blocks[0].h_index_matrix, ) assert all(i <= 0.1 for i in new_weights)
def test_error_model_on_simulated_data(background_variance, multiplicity, abs_tolerances, model_a, model_b): """Test the refinement of the error model using simulated data. The simulated data consists of 2150 unique reflections, with I = 5.0 * d^4, giving an intensity range from 122070.3 to 5.0, with a mean of 285.21 and a median of 13.76. Each reflection is sampled 'multiplicity' times from a poisson distribution to give the intensities, which is scaled away from the mean I using the model_bm factor (scaled such that when the model is refined, the correct b should be returned). The test uses three different representative levels of background variance (1=None, 5=low, 50=high), which is added to the variance from poisson counting statistics. A tolerance of 10 % is used for the model b parameter for good cases, which is increased to 20 % for tough cases. """ data = data_for_error_model_test(int(background_variance), int(multiplicity), b=model_b, a=model_a) Ih_table = IhTable([data], space_group("P 2ac 2ab")) block = Ih_table.blocked_data_list[0] BasicErrorModel.min_reflections_required = 250 error_model = BasicErrorModel() error_model.configure_for_refinement(block) assert error_model.binner.summation_matrix.n_rows > 400 refinery = ErrorModelRefinery(error_model, parameters_to_refine=["a", "b"]) refinery.run() assert refinery.model.parameters[0] == pytest.approx(model_a, abs=abs_tolerances[0]) assert abs(refinery.model.parameters[1]) == pytest.approx( model_b, abs=abs_tolerances[1])
def outlier_target_table(test_sg): """Generate an Ih_table for targeted outlier rejection""" target = generate_target_table() target_Ih = IhTable([target], test_sg, nblocks=1) return target_Ih
def generated_Ih_table(test_sg): """Generate an Ih_table""" rt = generate_outlier_table() Ih_table = IhTable([rt], test_sg, nblocks=1) return Ih_table
def run(): parser = OptionParser( read_experiments=True, read_reflections=True, check_format=False, epilog=__doc__, ) params, _, args = parser.parse_args(show_diff_phil=False, return_unhandled=True) log.config(verbosity=1, logfile="dials.cluster_filter.log") logger.info(dials_version()) diff_phil = parser.diff_phil.as_str() if diff_phil: logger.info("The following parameters have been modified:\n%s", diff_phil) reflections, expts = reflections_and_experiments_from_files( params.input.reflections, params.input.experiments) refls = reflections[0] refls["intensity"] = refls["intensity.scale.value"] refls["variance"] = refls["intensity.scale.variance"] refls["initial_index"] = flex.size_t_range(refls.size()) good_refls = refls.select(refls.get_flags(refls.flags.scaled)) Ih_table = IhTable( [good_refls], space_group=expts[0].crystal.get_space_group(), indices_lists=[good_refls["initial_index"]], ) block = Ih_table.blocked_data_list[0] to_exclude = flex.size_t([]) for group_idx in range(0, block.n_groups): sel = flex.bool(block.n_groups, False) sel[group_idx] = True sel_block = block.select_on_groups(sel) sel = sel_block.intensities / (sel_block.variances**0.5) > -1.0 sel_block = sel_block.select(sel) if sel_block.size: I = sel_block.intensities / sel_block.inverse_scale_factors V = sel_block.variances / (sel_block.inverse_scale_factors**2) result = test_group(I, V, sel_block.asu_miller_index[0]) if result: in_real = result[0] to_exclude.extend( flumpy.from_numpy( sel_block.Ih_table["loc_indices"].to_numpy()).select( ~in_real)) logger.info(to_exclude.size()) logger.info(refls.size()) bad = flex.bool(refls.size(), False) bad.set_selected(to_exclude, True) refls = refls.select(~bad) refls.as_file("filtered.refl") logger.info("Done")
def test_IhTable_freework(large_reflection_table, small_reflection_table, test_sg): sel1 = flex.bool(7, True) sel1[6] = False sel2 = flex.bool(4, True) sel2[1] = False Ih_table = IhTable( reflection_tables=[ large_reflection_table.select(sel1), small_reflection_table.select(sel2), ], indices_lists=[sel1.iselection(), sel2.iselection()], space_group=test_sg, nblocks=2, free_set_percentage=50.0, ) assert len(Ih_table.blocked_data_list) == 3 assert Ih_table.n_datasets == 2 assert Ih_table.n_work_blocks == 2 block_list = Ih_table.Ih_table_blocks # two standard blocks assert block_list[0].h_index_matrix[0, 0] == 1 assert block_list[0].h_index_matrix.non_zeroes == 1 assert block_list[1].h_index_matrix[0, 0] == 1 assert block_list[1].h_index_matrix[1, 0] == 1 assert block_list[1].h_index_matrix[2, 0] == 1 assert block_list[1].h_index_matrix.non_zeroes == 3 # free set block assert block_list[2].h_index_matrix[0, 0] == 1 assert block_list[2].h_index_matrix[1, 1] == 1 assert block_list[2].h_index_matrix[2, 2] == 1 assert block_list[2].h_index_matrix[3, 2] == 1 assert block_list[2].h_index_matrix[4, 3] == 1 assert block_list[2].h_index_matrix.non_zeroes == 5 assert list(block_list[0].block_selections[0]) == [5] assert list(block_list[0].block_selections[1]) == [] assert list(block_list[1].block_selections[0]) == [0, 2] assert list(block_list[1].block_selections[1]) == [0] assert list(block_list[2].block_selections[0]) == [1, 3, 4] assert list(block_list[2].block_selections[1]) == [3, 2] # test get_block_selections_for_dataset block_sels_0 = Ih_table.get_block_selections_for_dataset(0) assert len(block_sels_0) == 3 assert list(block_sels_0[0]) == [5] assert list(block_sels_0[1]) == [0, 2] assert list(block_sels_0[2]) == [1, 3, 4] block_sels_1 = Ih_table.get_block_selections_for_dataset(1) assert len(block_sels_1) == 3 assert list(block_sels_1[0]) == [] assert list(block_sels_1[1]) == [0] assert list(block_sels_1[2]) == [3, 2] with pytest.raises(AssertionError): _ = Ih_table.get_block_selections_for_dataset(2) Ih_table.calc_Ih() # test setting data # set scale factors new_s_block_2 = flex.double(range(1, 6)) Ih_table.set_inverse_scale_factors(new_s_block_2, 2) assert list(Ih_table.Ih_table_blocks[2].inverse_scale_factors) == list( new_s_block_2 ) # set derivatives derivs = Mock() Ih_table.set_derivatives(derivs, 0) assert Ih_table.Ih_table_blocks[0].derivatives is derivs def update_vars_side_effect(*args): return flex.double([0.5] * len(args[0])) # test setting an error model em = Mock() em.update_variances.side_effect = update_vars_side_effect Ih_table.update_weights(em) for block in Ih_table.Ih_table_blocks: assert list(block.weights) == pytest.approx([2.0] * block.size) Ih_table.calc_Ih(1) # now test free set with offset Ih_table = IhTable( reflection_tables=[ large_reflection_table.select(sel1), small_reflection_table.select(sel2), ], indices_lists=[sel1.iselection(), sel2.iselection()], space_group=test_sg, nblocks=2, free_set_percentage=50.0, free_set_offset=1, ) assert len(Ih_table.blocked_data_list) == 3 assert Ih_table.n_datasets == 2 assert Ih_table.n_work_blocks == 2 block_list = Ih_table.Ih_table_blocks # two standard blocks assert block_list[0].h_index_matrix[0, 0] == 1 assert block_list[0].h_index_matrix[1, 1] == 1 assert block_list[0].h_index_matrix.non_zeroes == 2 assert block_list[1].h_index_matrix[0, 0] == 1 assert block_list[1].h_index_matrix[1, 0] == 1 assert block_list[1].h_index_matrix[2, 1] == 1 assert block_list[1].h_index_matrix.non_zeroes == 3 # free set block assert block_list[2].h_index_matrix[0, 0] == 1 assert block_list[2].h_index_matrix[1, 1] == 1 assert block_list[2].h_index_matrix[2, 1] == 1 assert block_list[2].h_index_matrix[3, 1] == 1 assert block_list[2].h_index_matrix.non_zeroes == 4 assert list(block_list[0].block_selections[0]) == [1, 3] assert list(block_list[0].block_selections[1]) == [] assert list(block_list[1].block_selections[0]) == [4] assert list(block_list[1].block_selections[1]) == [3, 2] assert list(block_list[2].block_selections[0]) == [5, 0, 2] assert list(block_list[2].block_selections[1]) == [0] Ih_table.calc_Ih() # Test the 'as_miller_array' method. unit_cell = uctbx.unit_cell((1.0, 1.0, 1.0, 90.0, 90.0, 90.0)) arr = Ih_table.as_miller_array(unit_cell) assert arr.size() == 5 assert list(arr.indices()) == [ (0, 0, 1), (0, 2, 0), (0, 4, 0), (0, 4, 0), (10, 0, 0), ] assert list(arr.data()) == pytest.approx( [x / 2.0 for x in [100.0, 60.0, 30.0, 30.0, 10.0]] ) assert list(arr.sigmas()) == pytest.approx( [(x / 4.0) ** 0.5 for x in [100.0, 60.0, 30.0, 30.0, 10.0]] ) arr = Ih_table.as_miller_array(unit_cell, return_free_set_data=True) assert arr.size() == 4 assert list(arr.indices()) == [(0, 0, 2), (1, 0, 0), (1, 0, 0), (1, 0, 0)] assert list(arr.data()) == pytest.approx( [x / 2.0 for x in [40.0, 100.0, 80.0, 60.0]] ) assert list(arr.sigmas()) == pytest.approx( [(x / 4.0) ** 0.5 for x in [50.0, 90.0, 90.0, 60.0]] )
def test_IhTable_split_into_blocks( large_reflection_table, small_reflection_table, test_sg ): """Test that the Ih_table datastructure correctly organises the data from two reflection tables into two IhTableBlocks.""" sel1 = flex.bool(7, True) sel1[6] = False sel2 = flex.bool(4, True) sel2[1] = False Ih_table = IhTable( reflection_tables=[ large_reflection_table.select(sel1), small_reflection_table.select(sel2), ], indices_lists=[sel1.iselection(), sel2.iselection()], space_group=test_sg, nblocks=2, ) assert Ih_table.n_datasets == 2 assert Ih_table.n_work_blocks == 2 block_list = Ih_table.Ih_table_blocks assert list(block_list[0].Ih_table["asu_miller_index"]) == [ (0, 0, 1), (0, 0, 2), (0, 2, 0), ] assert list(block_list[1].Ih_table["asu_miller_index"]) == [ (0, 4, 0), (1, 0, 0), (1, 0, 0), (0, 4, 0), (1, 0, 0), (10, 0, 0), ] assert list(block_list[0].block_selections[0]) == [1, 5, 3] assert list(block_list[0].block_selections[1]) == [] assert list(block_list[1].block_selections[0]) == [4, 0, 2] assert list(block_list[1].block_selections[1]) == [3, 0, 2] # test the 'get_block_selections_for_dataset' method block_sels_0 = Ih_table.get_block_selections_for_dataset(dataset=0) assert len(block_sels_0) == 2 assert list(block_sels_0[0]) == [1, 5, 3] assert list(block_sels_0[1]) == [4, 0, 2] block_sels_1 = Ih_table.get_block_selections_for_dataset(dataset=1) assert len(block_sels_1) == 2 assert list(block_sels_1[0]) == [] assert list(block_sels_1[1]) == [3, 0, 2] # test the size method assert Ih_table.size == 9 expected_h_idx_matrix = sparse.matrix(4, 3) expected_h_idx_matrix[0, 0] = 1 expected_h_idx_matrix[1, 1] = 1 expected_h_idx_matrix[2, 2] = 1 assert block_list[0].h_index_matrix == expected_h_idx_matrix expected_h_idx_matrix = sparse.matrix(7, 3) expected_h_idx_matrix[0, 0] = 1 expected_h_idx_matrix[1, 1] = 1 expected_h_idx_matrix[2, 1] = 1 expected_h_idx_matrix[3, 0] = 1 expected_h_idx_matrix[4, 1] = 1 expected_h_idx_matrix[5, 2] = 1 assert block_list[1].h_index_matrix == expected_h_idx_matrix # check that only dataset 1 intensities are updated new_intensities = flex.double([60.0, 50.0, 40.0, 30.0, 20.0, 10.0]) Ih_table.update_data_in_blocks(data=new_intensities, dataset_id=0) assert list(Ih_table.blocked_data_list[0].intensities) == [50.0, 10.0, 30.0] assert list(Ih_table.blocked_data_list[1].intensities) == [ 20.0, 60.0, 40.0, 30.0, 60.0, 10.0, ] # try updating variances new_vars = flex.double([100.0, 200.0, 300.0, 400.0]) Ih_table.update_data_in_blocks(data=new_vars, dataset_id=1, column="variance") assert list(Ih_table.blocked_data_list[0].variances) == [100.0, 50.0, 60.0] assert list(Ih_table.blocked_data_list[1].variances) == [ 30.0, 90.0, 90.0, 400.0, 100.0, 300.0, ]