Exemple #1
0
 def __init__(self, root_models, backend, seed=None, discard_too_large_values=True):
     self.model = root_models
     self.backend = backend
     self.rng = np.random.RandomState(seed)
     self.discard_too_large_values = discard_too_large_values
     # An object managing the bds objects
     self.accepted_parameters_manager = AcceptedParametersManager(self.model)
    def test_DefaultKernel(self):
        B1 = Binomial([10, 0.2])
        N1 = Normal([0.1, 0.01])
        N2 = Normal([0.3, N1])
        graph = Normal([B1, N2])

        Manager = AcceptedParametersManager([graph])
        backend = Backend()
        kernel = DefaultKernel([N1, N2, B1])
        Manager.update_broadcast(backend, [[2, 0.27, 0.097], [3, 0.32, 0.012]],
                                 np.array([1, 1]),
                                 accepted_cov_mats=[[[0.01, 0], [0, 0.01]],
                                                    []])

        kernel_parameters = []
        for krnl in kernel.kernels:
            kernel_parameters.append(
                Manager.get_accepted_parameters_bds_values(krnl.models))

        Manager.update_kernel_values(backend,
                                     kernel_parameters=kernel_parameters)

        rng = np.random.RandomState(1)
        perturbed_values_and_models = kernel.update(Manager, 1, rng)
        self.assertEqual(perturbed_values_and_models,
                         [(N1, [0.17443453636632419]),
                          (N2, [0.25882435863499248]), (B1, [3])])
    def test_return_value_Student_T(self):
        N1 = Normal([0.1, 0.01])
        N2 = Normal([0.3, N1])
        graph = Normal([N1, N2])

        Manager = AcceptedParametersManager([graph])
        backend = Backend()
        kernel = JointPerturbationKernel([MultivariateStudentTKernel([N1, N2], df=2)])
        Manager.update_broadcast(backend, [[0.4, 0.09], [0.2, 0.008]], np.array([0.5, 0.2]))
        kernel_parameters = []
        for krnl in kernel.kernels:
            kernel_parameters.append(Manager.get_accepted_parameters_bds_values(krnl.models))
        Manager.update_kernel_values(backend, kernel_parameters)
        mapping, mapping_index = Manager.get_mapping(Manager.model)
        covs = [[[1, 0], [0, 1]], []]
        Manager.update_broadcast(backend, accepted_cov_mats=covs)
        pdf = kernel.pdf(mapping, Manager, Manager.accepted_parameters_bds.value()[1], [0.3, 0.1])
        self.assertTrue(isinstance(pdf, float))
    def test_return_value(self):
        B1 = Binomial([10, 0.2])
        N1 = Normal([0.1, 0.01])
        N2 = Normal([0.3, N1])
        graph = Normal([B1, N2])

        Manager = AcceptedParametersManager([graph])
        backend = Backend()
        kernel = DefaultKernel([N1, N2, B1])
        Manager.update_broadcast(backend, [[2, 0.4, 0.09], [3, 0.2, 0.008]], np.array([0.5, 0.2]))
        kernel_parameters = []
        for krnl in kernel.kernels:
            kernel_parameters.append(Manager.get_accepted_parameters_bds_values(krnl.models))
        Manager.update_kernel_values(backend, kernel_parameters)
        mapping, mapping_index = Manager.get_mapping(Manager.model)
        covs = [[[1,0],[0,1]],[]]
        Manager.update_broadcast(backend, accepted_cov_mats=covs)
        pdf = kernel.pdf(mapping, Manager, 1, [2,0.3,0.1])
        self.assertTrue(isinstance(pdf, float))
    def test_Student_T(self):
        N1 = Normal([0.1, 0.01])
        N2 = Normal([0.3, N1])
        graph = Normal([N1, N2])

        Manager = AcceptedParametersManager([graph])
        backend = Backend()
        kernel = JointPerturbationKernel([MultivariateStudentTKernel([N1, N2], df=2)])
        Manager.update_broadcast(backend, [[0.27, 0.097], [0.32, 0.012]], np.array([1, 1]))

        kernel_parameters = []
        for krnl in kernel.kernels:
            kernel_parameters.append(Manager.get_accepted_parameters_bds_values(krnl.models))
        Manager.update_kernel_values(backend, kernel_parameters)

        covs = kernel.calculate_cov(Manager)
        print(covs)
        self.assertTrue(len(covs) == 1)

        self.assertTrue(len(covs[0]) == 2)
    def test(self):
        B1 = Binomial([10, 0.2])
        N1 = Normal([0.1, 0.01])
        N2 = Normal([0.3, N1])
        graph = Normal([B1, N2])

        Manager = AcceptedParametersManager([graph])
        backend = Backend()
        kernel = DefaultKernel([N1, N2, B1])
        Manager.update_broadcast(backend, [[2, 0.27, 0.097], [3, 0.32, 0.012]], np.array([1, 1]))

        kernel_parameters = []
        for krnl in kernel.kernels:
            kernel_parameters.append(Manager.get_accepted_parameters_bds_values(krnl.models))
        Manager.update_kernel_values(backend, kernel_parameters)

        covs = kernel.calculate_cov(Manager)
        self.assertTrue(len(covs)==2)

        self.assertTrue(len(covs[0])==2)

        self.assertTrue(not(covs[1]))
    def test_Student_T(self):
        N1 = Normal([0.1, 0.01])
        N2 = Normal([0.3, N1])
        graph = Normal([N1, N2])

        Manager = AcceptedParametersManager([graph])
        backend = Backend()
        kernel = JointPerturbationKernel([MultivariateStudentTKernel([N1, N2], df=2)])
        Manager.update_broadcast(backend, [[0.27, 0.097], [0.32, 0.012]], np.array([1, 1]),
                                 accepted_cov_mats=[[[0.01, 0], [0, 0.01]], []])

        kernel_parameters = []
        for krnl in kernel.kernels:
            kernel_parameters.append(
                Manager.get_accepted_parameters_bds_values(krnl.models))

        Manager.update_kernel_values(backend, kernel_parameters=kernel_parameters)

        rng = np.random.RandomState(1)
        perturbed_values_and_models = kernel.update(Manager, 1, rng)
        print(perturbed_values_and_models)
        self.assertEqual(perturbed_values_and_models,
                         [(N1, [0.2107982411716391]), (N2, [-0.049106838502166614])])
Exemple #8
0
class DrawFromPosterior(InferenceMethod):
    model = None
    rng = None
    n_samples = None
    backend = None

    n_samples_per_param = None  # this needs to be there otherwise it does not instantiate correctly

    def __init__(self, root_models, backend, seed=None, discard_too_large_values=True):
        self.model = root_models
        self.backend = backend
        self.rng = np.random.RandomState(seed)
        self.discard_too_large_values = discard_too_large_values
        # An object managing the bds objects
        self.accepted_parameters_manager = AcceptedParametersManager(self.model)
        self.n_samples_per_param = 1

    def sample(self, journal_file):

        journal = Journal.fromFile(journal_file)
        accepted_parameters = journal.get_accepted_parameters(-1)
        accepted_weights = journal.get_weights(-1)
        n_samples = journal.configuration["n_samples"]

        self.accepted_parameters_manager.broadcast(self.backend, 1)
        # Broadcast Accepted parameters and Accepted weights
        self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters,
                                                          accepted_weights=accepted_weights)

        seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32)
        rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr])
        index_arr = np.arange(0,n_samples,1)
        data_arr = []
        for i in range(len(rng_arr)):
            data_arr.append([rng_arr[i], index_arr[i]])
        data_pds = self.backend.parallelize(data_arr)

        parameters_simulations_pds = self.backend.map(self._sample_parameter, data_pds)
        parameters_simulations = self.backend.collect(parameters_simulations_pds)
        parameters, simulations = [list(t) for t in zip(*parameters_simulations)]

        parameters = np.squeeze(np.array(parameters))
        simulations = np.squeeze(np.array(simulations))

        return parameters, simulations

    def _sample_parameter(self, data, npc=None):
        if isinstance(data, np.ndarray):
            data = data.tolist()
        rng = data[0]
        index = data[1]
        rng.seed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32))

        parameter = self.accepted_parameters_manager.accepted_parameters_bds.value()[index]
        print(parameter)
        parameter_list = [x[0] for x in parameter]
        print(parameter_list)
        self.set_parameters(parameter_list)
        param = self.get_parameters()
        print(param)
        y_sim = self.simulate(n_samples_per_param=1)
        #y_sim = self.model[0].forward_simulate(parameter_list,1)
        return parameter, y_sim
Exemple #9
0
class DrawFromPrior(InferenceMethod):
    model = None
    rng = None
    n_samples = None
    backend = None

    n_samples_per_param = None  # this needs to be there otherwise it does not instantiate correctly

    def __init__(self, root_models, backend, seed=None, discard_too_large_values=True):
        self.model = root_models
        self.backend = backend
        self.rng = np.random.RandomState(seed)
        self.discard_too_large_values = discard_too_large_values
        # An object managing the bds objects
        self.accepted_parameters_manager = AcceptedParametersManager(self.model)

    def sample(self, n_samples, n_samples_per_param):
        self.n_samples = n_samples
        self.n_samples_per_param = n_samples_per_param
        self.accepted_parameters_manager.broadcast(self.backend, 1)

        # now generate an array of seeds that need to be different one from the other. One way to do it is the
        # following.
        # Moreover, you cannot use int64 as seeds need to be < 2**32 - 1. How to fix this?
        # Note that this is not perfect; you still have small possibility of having some seeds that are equal. Is there
        # a better way? This would likely not change much the performance
        # An idea would be to use rng.choice but that is too
        seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=n_samples, dtype=np.uint32)
        # check how many equal seeds there are and remove them:
        sorted_seed_arr = np.sort(seed_arr)
        indices = sorted_seed_arr[:-1] == sorted_seed_arr[1:]
        # print("Number of equal seeds:", np.sum(indices))
        if np.sum(indices) > 0:
            # the following can be used to remove the equal seeds in case there are some
            sorted_seed_arr[:-1][indices] = sorted_seed_arr[:-1][indices] + 1
        # print("Number of equal seeds after update:", np.sum(sorted_seed_arr[:-1] == sorted_seed_arr[1:]))
        rng_arr = np.array([np.random.RandomState(seed) for seed in sorted_seed_arr])
        rng_pds = self.backend.parallelize(rng_arr)

        parameters_simulations_pds = self.backend.map(self._sample_parameter, rng_pds)
        parameters_simulations = self.backend.collect(parameters_simulations_pds)
        parameters, simulations = [list(t) for t in zip(*parameters_simulations)]

        parameters = np.squeeze(np.array(parameters))
        simulations = np.squeeze(np.array(simulations))
        #parameters = parameters.reshape((parameters.shape[0], parameters.shape[1]))
        #simulations = simulations.reshape((simulations.shape[0], simulations.shape[2], simulations.shape[3],))

        return parameters, simulations

    def sample_in_chunks(self, n_samples, n_samples_per_param, max_chunk_size=10 ** 4):
        """This splits the data generation in chunks. It is useful when generating large datasets with MPI backend,
        which gives an overflow error due to pickling very large objects."""
        parameters_list = []
        simulations_list = []
        samples_to_sample = n_samples
        while samples_to_sample > 0:
            parameters_part, simulations_part = self.sample(min(samples_to_sample, max_chunk_size), n_samples_per_param)
            samples_to_sample -= max_chunk_size
            parameters_list.append(parameters_part)
            simulations_list.append(simulations_part)
        parameters = np.concatenate(parameters_list)
        simulations = np.concatenate(simulations_list)
        return parameters, simulations

    def _sample_parameter(self, rng, npc=None):
        ok_flag = False

        while not ok_flag:
            self.sample_from_prior(rng=rng)
            theta = self.get_parameters(self.model)
            y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc)

            # if there are no potential infinities there (or if we do not check for those).
            # For instance, Lorenz model may give too large values sometimes (quite rarely).
            if np.sum(np.isinf(np.array(y_sim).astype("float32"))) > 0 and self.discard_too_large_values:
                print("y_sim contained too large values for float32; simulating again.")
            else:
                ok_flag = True

        return theta, y_sim
Exemple #10
0
class DrawFromParamValues(InferenceMethod):
    model = None
    rng = None
    n_samples = None
    backend = None

    n_samples_per_param = None  # this needs to be there otherwise it does not instantiate correctly

    def __init__(self,
                 root_models,
                 backend,
                 seed=None,
                 discard_too_large_values=True):
        self.model = root_models
        self.backend = backend
        self.rng = np.random.RandomState(seed)
        self.discard_too_large_values = discard_too_large_values
        # An object managing the bds objects
        self.accepted_parameters_manager = AcceptedParametersManager(
            self.model)

    def sample(self, param_values):

        self.param_values = param_values  # list of parameter values
        self.n_samples = len(param_values)
        self.accepted_parameters_manager.broadcast(self.backend, 1)

        # now generate an array of seeds that need to be different one from the other. One way to do it is the
        # following.
        # Moreover, you cannot use int64 as seeds need to be < 2**32 - 1. How to fix this?
        # Note that this is not perfect; you still have small possibility of having some seeds that are equal. Is there
        # a better way? This would likely not change much the performance
        # An idea would be to use rng.choice but that is too
        seed_arr = self.rng.randint(0,
                                    np.iinfo(np.uint32).max,
                                    size=self.n_samples,
                                    dtype=np.uint32)
        # check how many equal seeds there are and remove them:
        sorted_seed_arr = np.sort(seed_arr)
        indices = sorted_seed_arr[:-1] == sorted_seed_arr[1:]
        # print("Number of equal seeds:", np.sum(indices))
        if np.sum(indices) > 0:
            # the following can be used to remove the equal seeds in case there are some
            sorted_seed_arr[:-1][indices] = sorted_seed_arr[:-1][indices] + 1
        # print("Number of equal seeds after update:", np.sum(sorted_seed_arr[:-1] == sorted_seed_arr[1:]))
        rng_arr = np.array(
            [np.random.RandomState(seed) for seed in sorted_seed_arr])
        # zip with the param values:
        data_arr = list(zip(self.param_values, rng_arr))
        data_pds = self.backend.parallelize(data_arr)

        parameters_simulations_pds = self.backend.map(self._sample_parameter,
                                                      data_pds)
        parameters_simulations = self.backend.collect(
            parameters_simulations_pds)
        parameters, simulations = [
            list(t) for t in zip(*parameters_simulations)
        ]

        parameters = np.array(parameters).squeeze()
        simulations = np.array(simulations).squeeze()

        return parameters, simulations

    def _sample_parameter(self, data, npc=None):
        theta, rng = data[0], data[1]

        ok_flag = False

        while not ok_flag:
            # assume that we have one single model
            y_sim = self.model[0].forward_simulate(theta, 1, rng=rng)
            # self.sample_from_prior(rng=rng)
            # theta = self.get_parameters(self.model)
            # y_sim = self.simulate(1, rng=rng, npc=npc)

            # if there are no potential infinities there (or if we do not check for those).
            # For instance, Lorenz model may give too large values sometimes (quite rarely).
            if np.sum(np.isinf(np.array(y_sim).astype(
                    "float32"))) > 0 and self.discard_too_large_values:
                print(
                    "y_sim contained too large values for float32; simulating again."
                )
            else:
                ok_flag = True

        return theta, y_sim