Esempio n. 1
0
    def split_data_by_image(self, test_fraction=0.5):
        """
        Method for splitting data given a boolean mask. 
        This method will designate full images as belonging to the 
        train or test sets. 

        Parameters
        ----------
        test_fraction : float (optional)
            The fraction of images which will be reserved for testing.

        Returns
        -------
        train : tuple
        test  : tuple
        """
        image_id = BaseModel.get_image_id(self.inputs)
        test_idx = np.random.random(image_id.max() + 1) <= test_fraction

        # Low image count edge case (mostly just for testing purposes)
        if True not in test_idx:
            test_idx[0] = True
        elif False not in test_idx:
            test_idx[0] = False

        test_idx = test_idx[image_id]
        if BaseModel.is_laue(self.inputs):
            train, test = self.split_laue_data_by_mask(test_idx)
        else:
            train, test = self.split_mono_data_by_mask(test_idx)

        #return self.get_tf_dataset(train), self.get_tf_dataset(test)
        return train, test
Esempio n. 2
0
def test_laue_StudentTLikelihood(dof, laue_inputs):
    likelihood = StudentTLikelihood(dof)(laue_inputs)
    iobs = BaseModel.get_intensities(laue_inputs)
    sigiobs = BaseModel.get_uncertainties(laue_inputs)
    ipred = fake_ipred(laue_inputs)

    l_true = tfd.StudentT(dof, iobs, sigiobs)

    iconv = likelihood.convolve(ipred)

    test = likelihood.log_prob(ipred).numpy()
    expected = l_true.log_prob(iobs).numpy()

    nobs = BaseModel.get_harmonic_id(laue_inputs).max() + 1

    test = likelihood.log_prob(ipred).numpy()
    expected = l_true.log_prob(iobs).numpy().T

    #The zero padded entries at the end of the input will disagree
    #with the expected values. This is fine, because they will not
    #contribute to the gradient
    test = test[:, :nobs]
    expected = expected[:, :nobs]

    assert np.array_equal(expected.shape, test.shape)
    assert np.allclose(expected, test)

    #Test batches larger than 1
    ipred = np.concatenate((ipred, ipred, ipred), axis=0)
    likelihood.convolve(ipred).numpy()
    test = likelihood.log_prob(ipred).numpy()
    test = test[:, :nobs]
    assert np.array_equiv(expected, test)
Esempio n. 3
0
    def split_data_by_refl(self, test_fraction=0.5):
        """
        Method for splitting data given a boolean mask. 

        Parameters
        ----------
        test_fraction : float (optional)
            The fraction of reflections which will be reserved for testing.

        Returns
        -------
        train : tuple
        test  : tuple
        """
        if BaseModel.is_laue(self.inputs):
            harmonic_id = BaseModel.get_harmonic_id(self.inputs)
            test_idx = (np.random.random(harmonic_id.max() + 1) <=
                        test_fraction)[harmonic_id]
            train, test = self.split_laue_data_by_mask(test_idx)
            #return self.get_tf_dataset(train), self.get_tf_dataset(test)
            return train, test

        test_idx = np.random.random(len(self.inputs[0])) <= test_fraction
        train, test = self.split_mono_data_by_mask(test_idx)
        #return self.get_tf_dataset(train), self.get_tf_dataset(test)
        return train, test
Esempio n. 4
0
def test_laue(likelihood_model, prior_model, scaling_model, laue_inputs,
              mc_samples):
    nrefls = np.max(BaseModel.get_refl_id(laue_inputs)) + 1
    n_images = np.max(BaseModel.get_image_id(laue_inputs)) + 1

    #For the students
    dof = 4.
    if likelihood_model == StudentTLikelihood:
        likelihood = likelihood_model(dof)
    else:
        likelihood = likelihood_model()

    if prior_model == WilsonPrior:
        prior = prior_model(
            np.random.choice([True, False], nrefls),
            np.ones(nrefls).astype('float32'),
        )
    elif prior_model == StudentTReferencePrior:
        prior = prior_model(
            np.ones(nrefls).astype('float32'),
            np.ones(nrefls).astype('float32'), dof)
    else:
        prior = prior_model(
            np.ones(nrefls).astype('float32'),
            np.ones(nrefls).astype('float32'),
        )

    mlp_scaler = MLPScaler(2, 3)
    if scaling_model == HybridImageScaler:
        image_scaler = ImageScaler(n_images)
        scaler = HybridImageScaler(mlp_scaler, image_scaler)
    elif scaling_model == MLPScaler:
        scaler = mlp_scaler

    surrogate_posterior = tfd.TruncatedNormal(
        tf.Variable(prior.mean()),
        tfp.util.TransformedVariable(
            prior.stddev() / 10.,
            tfb.Softplus(),
        ),
        low=1e-5,
        high=1e10,
    )

    merger = VariationalMergingModel(surrogate_posterior, prior, likelihood,
                                     scaler, mc_samples)
    ipred = merger(laue_inputs)

    isfinite = np.all(np.isfinite(ipred.numpy()))
    assert isfinite

    merger = VariationalMergingModel(surrogate_posterior, prior, likelihood,
                                     scaler)
    merger.compile('Adam')
Esempio n. 5
0
def test_mono_LaplaceLikelihood(mono_inputs):
    likelihood = LaplaceLikelihood()(mono_inputs)
    iobs = BaseModel.get_intensities(mono_inputs)
    sigiobs = BaseModel.get_uncertainties(mono_inputs)

    l_true = tfd.Laplace(
        tf.squeeze(iobs), 
        tf.squeeze(sigiobs)/np.sqrt(2.),
    )
    z = l_true.sample()

    assert np.allclose(likelihood.log_prob(z), l_true.log_prob(z))
Esempio n. 6
0
def test_mono_StudentTLikelihood(dof, mono_inputs):
    likelihood = StudentTLikelihood(dof)(mono_inputs)
    iobs = BaseModel.get_intensities(mono_inputs)
    sigiobs = BaseModel.get_uncertainties(mono_inputs)

    l_true = tfd.StudentT(
        dof, 
        tf.squeeze(iobs), 
        tf.squeeze(sigiobs),
    )
    z = l_true.sample()

    assert np.allclose(likelihood.log_prob(z), l_true.log_prob(z))
Esempio n. 7
0
    def get_predictions(self, model, inputs=None):
        """ 
        Extract results from a surrogate_posterior.

        Parameters
        ----------
        model : VariationalMergingModel
            A merging model from careless
        inputs : tuple (optional)
            Inputs for which to make the predictions if None, self.inputs is used.

        Returns
        -------
        predictions : tuple
            A tuple of rs.DataSet objects containing the predictions for each 
            ReciprocalASU contained in self.asu_collection
        """
        if inputs is None:
            inputs = self.inputs

        refl_id = BaseModel.get_refl_id(inputs)
        iobs = BaseModel.get_intensities(inputs).flatten()
        sig_iobs = BaseModel.get_uncertainties(inputs).flatten()
        asu_id, H = self.asu_collection.to_asu_id_and_miller_index(refl_id)
        #ipred = model(inputs)
        ipred, sigipred = model.prediction_mean_stddev(inputs)

        h, k, l = H.T
        results = ()
        for i, asu in enumerate(self.asu_collection):
            idx = asu_id == i
            idx = idx.flatten()
            output = rs.DataSet(
                {
                    'H': h[idx],
                    'K': k[idx],
                    'L': l[idx],
                    'Iobs': iobs[idx],
                    'SigIobs': sig_iobs[idx],
                    'Ipred': ipred[idx],
                    'SigIpred': sigipred[idx],
                },
                cell=asu.cell,
                spacegroup=asu.spacegroup,
                merged=False,
            ).infer_mtz_dtypes().set_index(['H', 'K', 'L'])
            results += (output, )
        return results
Esempio n. 8
0
def test_mono_formatter(
    intensity_key,
    sigma_key,
    image_id_key,
    separate_outputs,
    anomalous,
    dmin,
    isigi_cutoff,
    positional_encoding_keys,
    encoding_bit_depth,
    mono_data_set,
):
    ds = mono_data_set.copy()
    f = MonoFormatter(
        intensity_key,
        sigma_key,
        image_id_key,
        metadata_keys,
        separate_outputs,
        anomalous,
        dmin,
        isigi_cutoff,
        positional_encoding_keys,
        encoding_bit_depth,
    )
    inputs, rac = f([ds])
    length = None
    for v in inputs:
        assert v.ndim == 2
        assert v.dtype in ('float32', 'int64')
        if length is None:
            length = v.shape[0]
        assert v.shape[0] == length

    metadata = BaseModel.get_metadata(inputs)
Esempio n. 9
0
        def split(inputs, idx):
            harmonic_id = BaseModel.get_harmonic_id(inputs)

            result = ()
            uni, inv = np.unique(harmonic_id[idx], return_inverse=True)
            for i, v in enumerate(inputs):
                name = BaseModel.get_name_by_index(i)
                if name in ('intensities', 'uncertainties'):
                    v = v[uni]
                    v = np.pad(v, [[0, len(inv) - len(v)], [0, 0]],
                               constant_values=1.)
                elif name == 'harmonic_id':
                    v = inv[:, None]
                else:
                    v = v[idx.flatten(), ...]
                result += (v, )
            return result
Esempio n. 10
0
    def get_tf_dataset(self, inputs=None):
        """
        Pack a dataset in the way that keras and careless expect.

        Parameters
        ----------
        inputs : tuple (optional)
            If None, self.inputs will be used
        """
        if inputs is None:
            inputs = self.inputs

        inputs = tuple(inputs)
        iobs = BaseModel.get_intensities(inputs)
        sigiobs = BaseModel.get_uncertainties(inputs)
        packed = (inputs, iobs, sigiobs)
        tfds = tf.data.Dataset.from_tensor_slices(packed)
        return tfds.batch(len(iobs))
Esempio n. 11
0
def test_laue_NormalLikelihood(laue_inputs):
    likelihood = NormalLikelihood()(laue_inputs)
    iobs = BaseModel.get_intensities(laue_inputs)
    sigiobs = BaseModel.get_uncertainties(laue_inputs)
    ipred = fake_ipred(laue_inputs)

    l_true = tfd.Normal(iobs, sigiobs)

    iconv = likelihood.convolve(ipred)

    test = likelihood.log_prob(ipred).numpy()
    expected = l_true.log_prob(iobs).numpy()
    assert np.array_equal(expected.shape, test.T.shape)
    assert np.allclose(expected, test.T)

    #Test batches larger than 1
    ipred = np.concatenate((ipred, ipred, ipred), axis=0)
    likelihood.convolve(ipred).numpy()
    test = likelihood.log_prob(ipred).numpy()
    assert np.array_equiv(expected, test.T)
Esempio n. 12
0
 def pack_inputs(self, inputs_dict):
     """
     inputs_dict : {k:v} where k corresponds to one of careless.models.base.BaseModel.input_index.keys()
     """
     inputs = ()
     for i in range(len(BaseModel.input_index)):
         k = BaseModel.get_name_by_index(i)
         if k in inputs_dict:
             inputs += (inputs_dict[k], )
         else:
             break
     return inputs
Esempio n. 13
0
    def split_laue_data_by_mask(self, test_idx):
        """
        Method for splitting laue data given a boolean mask. 
        This method will split up the data and alter the harmonic_id
        column to reflect the decrease in size of the array. 

        Parameters
        ----------
        test_idx : array (boolean)
            Boolean array with length of inputs.

        Returns
        -------
        train : tuple
        test  : tuple
        """
        harmonic_id = BaseModel.get_harmonic_id(self.inputs)

        # Let us just test that the boolean mask is valid for these data.
        # If it does not split observations, isect should be empty
        isect = np.intersect1d(
            harmonic_id[test_idx].flatten(),
            harmonic_id[~test_idx].flatten(),
        )
        if len(isect) > 0:
            raise ValueError(
                f"test_idx splits harmonic observations with harmonic_id : {isect}"
            )

        def split(inputs, idx):
            harmonic_id = BaseModel.get_harmonic_id(inputs)

            result = ()
            uni, inv = np.unique(harmonic_id[idx], return_inverse=True)
            for i, v in enumerate(inputs):
                name = BaseModel.get_name_by_index(i)
                if name in ('intensities', 'uncertainties'):
                    v = v[uni]
                    v = np.pad(v, [[0, len(inv) - len(v)], [0, 0]],
                               constant_values=1.)
                elif name == 'harmonic_id':
                    v = inv[:, None]
                else:
                    v = v[idx.flatten(), ...]
                result += (v, )
            return result

        return split(self.inputs, ~test_idx), split(self.inputs, test_idx)
Esempio n. 14
0
def fake_ipred(inputs):
    harmonic_id = BaseModel.get_harmonic_id(inputs).flatten()
    intensities = BaseModel.get_intensities(inputs).flatten()
    result = intensities[harmonic_id] / np.bincount(harmonic_id)[harmonic_id]
    return result[None, :].astype('float32')
Esempio n. 15
0
def test_is_laue(laue_inputs, mono_inputs):
    assert BaseModel.is_laue(laue_inputs)
    assert not BaseModel.is_laue(mono_inputs)
Esempio n. 16
0
    def get_results(self,
                    surrogate_posterior,
                    inputs=None,
                    output_parameters=True):
        """ 
        Extract results from a surrogate_posterior.

        Parameters
        ----------
        surrogate_posterior : tfd.Distribution
            A tensorflow_probability distribution or similar object with `mean` and `stddev` methods
        inputs : tuple (optional)
            Optionally use a different object from self.inputs to compute the redundancy of reflections.
        output_parameters : bool (optional)
            If True, output the parameters of the surrogate distribution in addition to the 
            moments. 

        Returns
        -------
        results : tuple
            A tuple of rs.DataSet objects containing the results corresponding to each 
            ReciprocalASU contained in self.asu_collection
        """
        if inputs is None:
            inputs = self.inputs
        F = surrogate_posterior.mean().numpy()
        SigF = surrogate_posterior.stddev().numpy()
        params = None
        if output_parameters:
            params = {}
            for k in sorted(surrogate_posterior.parameter_properties()):
                v = surrogate_posterior.parameters[k]
                numpify = lambda x: tf.convert_to_tensor(x).numpy()
                params[k] = numpify(v).flatten() * np.ones(len(F),
                                                           dtype='float32')
        asu_id, H = self.asu_collection.to_asu_id_and_miller_index(
            np.arange(len(F)))
        h, k, l = H.T
        refl_id = BaseModel.get_refl_id(inputs)
        N = np.bincount(refl_id.flatten(), minlength=len(F)).astype('float32')
        results = ()
        for i, asu in enumerate(self.asu_collection):
            idx = asu_id == i
            idx = idx.flatten()
            output = rs.DataSet(
                {
                    'H': h[idx],
                    'K': k[idx],
                    'L': l[idx],
                    'F': F[idx],
                    'SigF': SigF[idx],
                    'N': N[idx],
                },
                cell=asu.cell,
                spacegroup=asu.spacegroup,
                merged=True,
            ).infer_mtz_dtypes().set_index(['H', 'K', 'L'])
            if params is not None:
                for key in sorted(params.keys()):
                    val = params[key]
                    output[key] = rs.DataSeries(val[idx],
                                                index=output.index,
                                                dtype='R')

            # Remove unobserved refls
            output = output[output.N > 0]

            # Reformat anomalous data
            if asu.anomalous:
                output = output.unstack_anomalous()
                # PHENIX will expect the sf / error keys in a particular order.
                anom_keys = [
                    'F(+)', 'SigF(+)', 'F(-)', 'SigF(-)', 'N(+)', 'N(-)'
                ]
                reorder = anom_keys + [
                    key for key in output if key not in anom_keys
                ]
                output = output[reorder]

            results += (output, )
        return results
Esempio n. 17
0
def test_getters(laue_inputs, mono_inputs):
    for inputs in laue_inputs, mono_inputs:
        if BaseModel.is_laue(inputs):
            BaseModel.get_harmonic_id(inputs)
            BaseModel.get_wavelength(inputs)
        BaseModel.get_image_id(inputs)
        BaseModel.get_intensities(inputs)
        BaseModel.get_metadata(inputs)
        BaseModel.get_refl_id(inputs)
        BaseModel.get_uncertainties(inputs)
Esempio n. 18
0
    def build_model(self,
                    parser=None,
                    surrogate_posterior=None,
                    prior=None,
                    likelihood=None,
                    scaling_model=None,
                    mc_sample_size=None):
        """
        Build the model specified in parser, a careless.parser.parser.parse_args() result. Optionally override any of the 
        parameters taken by the VariationalMergingModel constructor.
        The `parser` parameter is required if self.parser is not set. 
        """
        from careless.models.merging.surrogate_posteriors import TruncatedNormal
        from careless.models.merging.variational import VariationalMergingModel
        from careless.models.scaling.image import HybridImageScaler, ImageScaler
        from careless.models.scaling.nn import MLPScaler
        if parser is None:
            parser = self.parser
        if parser is None:
            raise ValueError("No parser supplied, but self.parser is unset")

        if parser.type == 'poly':
            from careless.models.likelihoods.laue import NormalLikelihood, StudentTLikelihood
        elif parser.type == 'mono':
            from careless.models.likelihoods.mono import NormalLikelihood, StudentTLikelihood

        if prior is None:
            prior = self.get_wilson_prior(parser.wilson_prior_b)
        loc, scale = prior.mean(), prior.stddev() / 10.
        low = (1e-32 * self.asu_collection.centric).astype('float32')
        if surrogate_posterior is None:
            surrogate_posterior = TruncatedNormal.from_loc_and_scale(
                loc, scale, low)

        if likelihood is None:
            dof = parser.studentt_likelihood_dof
            if dof is None:
                likelihood = NormalLikelihood()
            else:
                likelihood = StudentTLikelihood(dof)

        if scaling_model is None:
            mlp_width = parser.mlp_width
            if mlp_width is None:
                mlp_width = BaseModel.get_metadata(self.inputs).shape[-1]

            mlp_scaler = MLPScaler(parser.mlp_layers, mlp_width)
            if parser.use_image_scales:
                n_images = np.max(BaseModel.get_image_id(self.inputs)) + 1
                image_scaler = ImageScaler(n_images)
                scaling_model = HybridImageScaler(mlp_scaler, image_scaler)
            else:
                scaling_model = mlp_scaler

        model = VariationalMergingModel(surrogate_posterior, prior, likelihood,
                                        scaling_model, parser.mc_samples)

        opt = tf.keras.optimizers.Adam(
            parser.learning_rate,
            parser.beta_1,
            parser.beta_2,
        )

        model.compile(opt)
        return model
Esempio n. 19
0
def test_by_index():
    for k, v in BaseModel.input_index.items():
        assert BaseModel.get_index_by_name(k) == v