Beispiel #1
0
 def _intialise_target(self):
     if self.params.dimensions is Auto:
         dimensions = None
     else:
         dimensions = self.params.dimensions
     if self.params.lattice_group is not None:
         self.lattice_group = (self.params.lattice_group.group(
         ).build_derived_patterson_group().info().primitive_setting().group(
         ))
     self.target = target.Target(
         self.intensities,
         self.dataset_ids.as_numpy_array(),
         min_pairs=self.params.min_pairs,
         lattice_group=self.lattice_group,
         dimensions=dimensions,
         weights=self.params.weights,
     )
Beispiel #2
0
def test_cosym_target(space_group):
    datasets, expected_reindexing_ops = generate_test_data(
        space_group=sgtbx.space_group_info(symbol=space_group).group(),
        sample_size=50)

    intensities = datasets[0]
    dataset_ids = np.zeros(intensities.size() * len(datasets))
    for i, d in enumerate(datasets[1:]):
        i += 1
        intensities = intensities.concatenate(d,
                                              assert_is_similar_symmetry=False)
        dataset_ids[i * d.size():(i + 1) * d.size()] = np.full(d.size(),
                                                               i,
                                                               dtype=int)

    for weights in [None, "count", "standard_error"]:
        print(weights)
        t = target.Target(intensities, dataset_ids, weights=weights)
        m = len(t.sym_ops)
        n = len(datasets)
        assert t.dim == m
        assert t.rij_matrix.shape == (n * m, n * m)
        # x = np.random.rand(n * m * t.dim)
        x = flex.random_double(n * m * t.dim).as_numpy_array()
        f0 = t.compute_functional(x)
        g = t.compute_gradients(x)
        g_fd = t.compute_gradients_fd(x)
        np.testing.assert_allclose(g, g_fd, rtol=2e-3)
        c = t.curvatures(x)
        c_fd = t.curvatures_fd(x, eps=1e-3)
        assert list(c) == pytest.approx(c_fd, rel=0.8e-1)

        if weights == "count":
            # Absolute upper limit on weights
            assert t.wij_matrix.max() <= datasets[0].size()

        minimizer = engine.lbfgs_with_curvs(target=t, coords=x)
        # check functional has decreased and gradients are approximately zero
        f = t.compute_functional(minimizer.coords)
        g = t.compute_gradients(minimizer.coords)
        g_fd = t.compute_gradients_fd(minimizer.coords)
        assert f < f0
        assert pytest.approx(g, abs=1e-3) == [0] * len(g)
        assert pytest.approx(g_fd, abs=1e-3) == [0] * len(g)
Beispiel #3
0
def test_cosym_target(space_group):
    datasets, expected_reindexing_ops = generate_test_data(
        space_group=sgtbx.space_group_info(symbol=space_group).group(),
        sample_size=50)

    intensities = datasets[0]
    dataset_ids = flex.double(intensities.size(), 0)
    for i, d in enumerate(datasets[1:]):
        intensities = intensities.concatenate(d,
                                              assert_is_similar_symmetry=False)
        dataset_ids.extend(flex.double(d.size(), i + 1))

    for weights in [None, "count", "standard_error"]:
        print(weights)
        t = target.Target(intensities, dataset_ids, weights=weights)
        m = len(t.get_sym_ops())
        n = len(datasets)
        assert t.dim == m
        assert t.rij_matrix.all() == (n * m, n * m)
        x = flex.random_double(n * m * t.dim)
        f0, g = t.compute_functional_and_gradients(x)
        g_fd = t.compute_gradients_fd(x)
        for n, value in enumerate(zip(g, g_fd)):
            assert value[0] == pytest.approx(value[1], rel=2e-3), n

        c = t.curvatures(x)
        c_fd = t.curvatures_fd(x, eps=1e-3)
        assert list(c) == pytest.approx(c_fd, rel=0.8e-1)

        assert engine.lbfgs_with_curvs(target=t, coords=x)
        t.compute_functional(x)
        # check functional has decreased and gradients are approximately zero
        f, g = t.compute_functional_and_gradients(x)
        g_fd = t.compute_gradients_fd(x)
        assert f < f0
        assert pytest.approx(g, abs=1e-3) == [0] * len(g)
        assert pytest.approx(g_fd, abs=1e-3) == [0] * len(g)
Beispiel #4
0
def test_cosym_target(space_group):
    datasets, expected_reindexing_ops = generate_test_data(
        space_group=sgtbx.space_group_info(symbol=space_group).group())

    for weights in [None, 'count', 'standard_error']:

        t = target.Target(
            datasets,
            weights=weights,
        )
        m = len(t.get_sym_ops())
        n = len(datasets)
        assert t.dim == m
        assert t.rij_matrix.all() == (n * m, n * m)
        x = flex.random_double(n * m * t.dim)
        x_orig = x.deep_copy()
        f0, g = t.compute_functional_and_gradients(x)
        g_fd = t.compute_gradients_fd(x)
        assert g.all_approx_equal_relatively(g_fd, relative_error=1e-4)

        c = t.curvatures(x)
        c_fd = t.curvatures_fd(x, eps=1e-3)
        assert c.all_approx_equal_relatively(c_fd, relative_error=0.5e-1)

        M = engine.lbfgs_with_curvs(
            target=t,
            coords=x,
            verbose=False,
        )
        t.compute_functional(x)
        # check functional has decreased and gradients are approximately zero
        f, g = t.compute_functional_and_gradients(x)
        g_fd = t.compute_gradients_fd(x)
        assert f < f0
        assert g.all_approx_equal(0, 1e-3)
        assert g_fd.all_approx_equal(0, 1e-3)
Beispiel #5
0
    def __init__(self, datasets, params):
        self.datasets = datasets
        self.params = params

        self.input_space_group = None
        for dataset in datasets:
            if self.input_space_group is None:
                self.input_space_group = dataset.space_group()
            else:
                assert dataset.space_group() == self.input_space_group

        if self.params.dimensions is Auto:
            dimensions = None
        else:
            dimensions = self.params.dimensions
        lattice_group = None
        if self.params.lattice_group is not None:
            lattice_group = self.params.lattice_group.group()
        self.target = target.Target(
            self.datasets,
            min_pairs=self.params.min_pairs,
            lattice_group=lattice_group,
            dimensions=dimensions,
            verbose=self.params.verbose,
            weights=self.params.weights,
            nproc=self.params.nproc,
        )
        if self.params.dimensions is Auto:
            dimensions = []
            functional = []
            explained_variance = []
            explained_variance_ratio = []
            for dim in range(1, self.target.dim + 1):
                self.target.set_dimensions(dim)
                self.optimise()
                logger.info('Functional: %g' % self.minimizer.f)
                self.principal_component_analysis()
                dimensions.append(dim)
                functional.append(self.minimizer.f)
                explained_variance.append(self.explained_variance)
                explained_variance_ratio.append(self.explained_variance_ratio)

            # Find the elbow point of the curve, in the same manner as that used by
            # distl spotfinder for resolution method 1 (Zhang et al 2006).
            # See also dials/algorithms/spot_finding/per_image_analysis.py

            from scitbx import matrix
            x = flex.double(dimensions)
            y = flex.double(functional)
            slopes = (y[-1] - y[:-1]) / (x[-1] - x[:-1])
            p_m = flex.min_index(slopes)

            x1 = matrix.col((x[p_m], y[p_m]))
            x2 = matrix.col((x[-1], y[-1]))

            gaps = flex.double()
            v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize()

            for i in range(p_m, len(x)):
                x0 = matrix.col((x[i], y[i]))
                r = x1 - x0
                g = abs(v.dot(r))
                gaps.append(g)

            p_k = flex.max_index(gaps)
            g_k = gaps[p_k]
            p_g = p_k

            x_g = x[p_g + p_m]
            y_g = y[p_g + p_m]

            logger.info('Best number of dimensions: %i' % x_g)
            self.target.set_dimensions(int(x_g))

            if params.save_plot:
                from matplotlib import pyplot as plt
                fig = plt.figure(figsize=(10, 8))
                plt.clf()
                plt.plot(dimensions, functional)
                plt.plot([x_g, x_g], plt.ylim())
                plt.xlabel('Dimensions')
                plt.ylabel('Functional')
                plt.savefig('%sfunctional_vs_dimension.png' %
                            params.plot_prefix)

                plt.clf()
                for dim, expl_var in zip(dimensions, explained_variance):
                    plt.plot(range(1, dim + 1), expl_var, label='%s' % dim)
                plt.plot([x_g, x_g], plt.ylim())
                plt.xlabel('Dimension')
                plt.ylabel('Explained variance')
                plt.savefig('%sexplained_variance_vs_dimension.png' %
                            params.plot_prefix)

                plt.clf()
                for dim, expl_var_ratio in zip(dimensions,
                                               explained_variance_ratio):
                    plt.plot(range(1, dim + 1),
                             expl_var_ratio,
                             label='%s' % dim)
                plt.plot([x_g, x_g], plt.ylim())
                plt.xlabel('Dimension')
                plt.ylabel('Explained variance ratio')
                plt.savefig('%sexplained_variance_ratio_vs_dimension.png' %
                            params.plot_prefix)
                plt.close(fig)

        self.optimise()
        self.principal_component_analysis()

        self.cosine_analysis()
        self.cluster_analysis()
        if self.params.save_plot:
            self.plot()