def _intialise_target(self): if self.params.dimensions is Auto: dimensions = None else: dimensions = self.params.dimensions if self.params.lattice_group is not None: self.lattice_group = (self.params.lattice_group.group( ).build_derived_patterson_group().info().primitive_setting().group( )) self.target = target.Target( self.intensities, self.dataset_ids.as_numpy_array(), min_pairs=self.params.min_pairs, lattice_group=self.lattice_group, dimensions=dimensions, weights=self.params.weights, )
def test_cosym_target(space_group): datasets, expected_reindexing_ops = generate_test_data( space_group=sgtbx.space_group_info(symbol=space_group).group(), sample_size=50) intensities = datasets[0] dataset_ids = np.zeros(intensities.size() * len(datasets)) for i, d in enumerate(datasets[1:]): i += 1 intensities = intensities.concatenate(d, assert_is_similar_symmetry=False) dataset_ids[i * d.size():(i + 1) * d.size()] = np.full(d.size(), i, dtype=int) for weights in [None, "count", "standard_error"]: print(weights) t = target.Target(intensities, dataset_ids, weights=weights) m = len(t.sym_ops) n = len(datasets) assert t.dim == m assert t.rij_matrix.shape == (n * m, n * m) # x = np.random.rand(n * m * t.dim) x = flex.random_double(n * m * t.dim).as_numpy_array() f0 = t.compute_functional(x) g = t.compute_gradients(x) g_fd = t.compute_gradients_fd(x) np.testing.assert_allclose(g, g_fd, rtol=2e-3) c = t.curvatures(x) c_fd = t.curvatures_fd(x, eps=1e-3) assert list(c) == pytest.approx(c_fd, rel=0.8e-1) if weights == "count": # Absolute upper limit on weights assert t.wij_matrix.max() <= datasets[0].size() minimizer = engine.lbfgs_with_curvs(target=t, coords=x) # check functional has decreased and gradients are approximately zero f = t.compute_functional(minimizer.coords) g = t.compute_gradients(minimizer.coords) g_fd = t.compute_gradients_fd(minimizer.coords) assert f < f0 assert pytest.approx(g, abs=1e-3) == [0] * len(g) assert pytest.approx(g_fd, abs=1e-3) == [0] * len(g)
def test_cosym_target(space_group): datasets, expected_reindexing_ops = generate_test_data( space_group=sgtbx.space_group_info(symbol=space_group).group(), sample_size=50) intensities = datasets[0] dataset_ids = flex.double(intensities.size(), 0) for i, d in enumerate(datasets[1:]): intensities = intensities.concatenate(d, assert_is_similar_symmetry=False) dataset_ids.extend(flex.double(d.size(), i + 1)) for weights in [None, "count", "standard_error"]: print(weights) t = target.Target(intensities, dataset_ids, weights=weights) m = len(t.get_sym_ops()) n = len(datasets) assert t.dim == m assert t.rij_matrix.all() == (n * m, n * m) x = flex.random_double(n * m * t.dim) f0, g = t.compute_functional_and_gradients(x) g_fd = t.compute_gradients_fd(x) for n, value in enumerate(zip(g, g_fd)): assert value[0] == pytest.approx(value[1], rel=2e-3), n c = t.curvatures(x) c_fd = t.curvatures_fd(x, eps=1e-3) assert list(c) == pytest.approx(c_fd, rel=0.8e-1) assert engine.lbfgs_with_curvs(target=t, coords=x) t.compute_functional(x) # check functional has decreased and gradients are approximately zero f, g = t.compute_functional_and_gradients(x) g_fd = t.compute_gradients_fd(x) assert f < f0 assert pytest.approx(g, abs=1e-3) == [0] * len(g) assert pytest.approx(g_fd, abs=1e-3) == [0] * len(g)
def test_cosym_target(space_group): datasets, expected_reindexing_ops = generate_test_data( space_group=sgtbx.space_group_info(symbol=space_group).group()) for weights in [None, 'count', 'standard_error']: t = target.Target( datasets, weights=weights, ) m = len(t.get_sym_ops()) n = len(datasets) assert t.dim == m assert t.rij_matrix.all() == (n * m, n * m) x = flex.random_double(n * m * t.dim) x_orig = x.deep_copy() f0, g = t.compute_functional_and_gradients(x) g_fd = t.compute_gradients_fd(x) assert g.all_approx_equal_relatively(g_fd, relative_error=1e-4) c = t.curvatures(x) c_fd = t.curvatures_fd(x, eps=1e-3) assert c.all_approx_equal_relatively(c_fd, relative_error=0.5e-1) M = engine.lbfgs_with_curvs( target=t, coords=x, verbose=False, ) t.compute_functional(x) # check functional has decreased and gradients are approximately zero f, g = t.compute_functional_and_gradients(x) g_fd = t.compute_gradients_fd(x) assert f < f0 assert g.all_approx_equal(0, 1e-3) assert g_fd.all_approx_equal(0, 1e-3)
def __init__(self, datasets, params): self.datasets = datasets self.params = params self.input_space_group = None for dataset in datasets: if self.input_space_group is None: self.input_space_group = dataset.space_group() else: assert dataset.space_group() == self.input_space_group if self.params.dimensions is Auto: dimensions = None else: dimensions = self.params.dimensions lattice_group = None if self.params.lattice_group is not None: lattice_group = self.params.lattice_group.group() self.target = target.Target( self.datasets, min_pairs=self.params.min_pairs, lattice_group=lattice_group, dimensions=dimensions, verbose=self.params.verbose, weights=self.params.weights, nproc=self.params.nproc, ) if self.params.dimensions is Auto: dimensions = [] functional = [] explained_variance = [] explained_variance_ratio = [] for dim in range(1, self.target.dim + 1): self.target.set_dimensions(dim) self.optimise() logger.info('Functional: %g' % self.minimizer.f) self.principal_component_analysis() dimensions.append(dim) functional.append(self.minimizer.f) explained_variance.append(self.explained_variance) explained_variance_ratio.append(self.explained_variance_ratio) # Find the elbow point of the curve, in the same manner as that used by # distl spotfinder for resolution method 1 (Zhang et al 2006). # See also dials/algorithms/spot_finding/per_image_analysis.py from scitbx import matrix x = flex.double(dimensions) y = flex.double(functional) slopes = (y[-1] - y[:-1]) / (x[-1] - x[:-1]) p_m = flex.min_index(slopes) x1 = matrix.col((x[p_m], y[p_m])) x2 = matrix.col((x[-1], y[-1])) gaps = flex.double() v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize() for i in range(p_m, len(x)): x0 = matrix.col((x[i], y[i])) r = x1 - x0 g = abs(v.dot(r)) gaps.append(g) p_k = flex.max_index(gaps) g_k = gaps[p_k] p_g = p_k x_g = x[p_g + p_m] y_g = y[p_g + p_m] logger.info('Best number of dimensions: %i' % x_g) self.target.set_dimensions(int(x_g)) if params.save_plot: from matplotlib import pyplot as plt fig = plt.figure(figsize=(10, 8)) plt.clf() plt.plot(dimensions, functional) plt.plot([x_g, x_g], plt.ylim()) plt.xlabel('Dimensions') plt.ylabel('Functional') plt.savefig('%sfunctional_vs_dimension.png' % params.plot_prefix) plt.clf() for dim, expl_var in zip(dimensions, explained_variance): plt.plot(range(1, dim + 1), expl_var, label='%s' % dim) plt.plot([x_g, x_g], plt.ylim()) plt.xlabel('Dimension') plt.ylabel('Explained variance') plt.savefig('%sexplained_variance_vs_dimension.png' % params.plot_prefix) plt.clf() for dim, expl_var_ratio in zip(dimensions, explained_variance_ratio): plt.plot(range(1, dim + 1), expl_var_ratio, label='%s' % dim) plt.plot([x_g, x_g], plt.ylim()) plt.xlabel('Dimension') plt.ylabel('Explained variance ratio') plt.savefig('%sexplained_variance_ratio_vs_dimension.png' % params.plot_prefix) plt.close(fig) self.optimise() self.principal_component_analysis() self.cosine_analysis() self.cluster_analysis() if self.params.save_plot: self.plot()