Ejemplo n.º 1
0
    def infer(self,
              num_samples,
              alpha=0.5,
              R_trial=10,
              c=0.01,
              p_min=0.05,
              batch_size=10,
              chunk_size=1):
        """Performs SMC-ABC.

        Parameters
        ----------
        num_samples : int
            The number of required accepted samples
        alpha : float
            Culling percentage
        R_trial : int
            Number of perturbs per replenishment to estimate probability
        c : float
            Sensitivity for more perturbations
        p_min : float
            Termination condition as a probability of a successul perturbation

        Returns
        -------
        dict
            Keys
            'accepted_samples: The accepted parameter values',
            'distances: Accepted distance values'
        """

        assert hasattr(
            self, "fixed_mean"), "Please call compute_fixed_mean before infer"

        # Get the dask graph and add another distances task to it
        graph_dict = core.get_graph_chunked(self.prior_function.draw, self.sim,
                                            self.summaries_function,
                                            batch_size, chunk_size)
        dist_func = lambda x: self.distance_function(self.fixed_mean, x)
        graph_dict["distances"] = core.get_distance(dist_func,
                                                    graph_dict["summarystats"],
                                                    chunked=True)

        # Culling Cutoff
        n_cull = round(alpha * num_samples)

        # Draw the initial population and compute distances
        population, distances = dask.compute(graph_dict['parameters'],
                                             graph_dict['distances'])
        population = core._reshape_chunks(population)
        distances = core._reshape_chunks(distances)

        while population.shape[0] < num_samples:
            params, dists = dask.compute(graph_dict["parameters"],
                                         graph_dict["distances"])
            params = core._reshape_chunks(params)
            dists = core._reshape_chunks(dists)
            population = np.vstack([population, params])
            distances = np.vstack([distances, dists])

        population = population[:num_samples]
        distances = distances[:num_samples, 0]

        terminate = False
        while not terminate:

            try:
                # Sort population by distance
                sorted_idxs = np.argsort(distances)
                population = population[sorted_idxs]
                distances = distances[sorted_idxs]

                # Cull the last Na
                population = population[:n_cull]
                distances = distances[:n_cull]
                tol = distances[-1]

                # Resample with replacement to replenish in the population
                resampled_idxs = np.random.choice(n_cull, num_samples - n_cull)
                population = np.vstack(
                    [population, population[resampled_idxs]])
                distances = np.concatenate(
                    [distances, distances[resampled_idxs]])

                # Adapt transition kernel using the new population
                self.perturbation_kernel.adapt(population)

                # For each replenished value, perturb and resample a few time
                # to get an idea of how easy it is to move to a lower distance
                perturb_tasks = []
                for i in range(n_cull, num_samples):
                    perturb_tasks.append(
                        self._perturb_resample(population[i, :], distances[i],
                                               R_trial, tol))
                res, = dask.compute(perturb_tasks)

                # Update the population with the perturbed population
                updated_ps, updated_distances, update_p_accs, N_accs = list(
                    zip(*res))

                population[n_cull:] = np.vstack(updated_ps)
                distances[n_cull:] = np.asarray(updated_distances)

                # Update metrics from the trial to estimate the probability
                # of a move to assess convergence and decide how many more
                # perturbation attempts to make
                p_acc = np.sum(update_p_accs) / (num_samples - n_cull)
                N_acc = np.sum(N_accs)

                R = int(round(np.log(c) / np.log(1 - p_acc)))

                # Perturb again with better estimate
                perturb_tasks = []
                for i in range(n_cull, num_samples):
                    perturb_tasks.append(
                        self._perturb_resample(population[i, :], distances[i],
                                               R - R_trial, tol))
                res, = dask.compute(perturb_tasks)

                updated_ps, updated_distances, update_p_accs, N_accs = list(
                    zip(*res))

                population[n_cull:] = np.vstack(updated_ps)
                distances[n_cull:] = np.asarray(updated_distances)

                p_acc += np.sum(update_p_accs) / (num_samples - n_cull)
                N_acc += np.sum(N_accs)

                print("Tol : {}, R : {}, p_acc : {}".format(tol, R, p_acc))
                if p_acc < p_min:
                    terminate = True
            except KeyboardInterrupt:
                return {'accepted_samples': population, 'distances': distances}
            except:
                raise

        return {'accepted_samples': population, 'distances': distances}
Ejemplo n.º 2
0
    def infer(self,
              num_samples,
              num_rounds,
              chunk_size=10,
              exploit=True,
              seed=None):
        np.random.seed(seed)
        thetas = []
        data_tot = []
        proposal = self.prior_function

        try:
            for i in range(num_rounds):

                graph_dict = core.get_graph_chunked(proposal.draw,
                                                    self.sim,
                                                    batch_size=num_samples,
                                                    chunk_size=chunk_size)

                if self.verbose:
                    print(f"starting round {i}")

                #Simulate data
                samples, data = dask.compute(graph_dict["parameters"],
                                             graph_dict["trajectories"])
                samples = core._reshape_chunks(samples)
                data = np.array(data)
                if self.verbose:
                    print('data shape: ', data.shape)

                #Reshaping for NN
                # standard is num_chunks x chunk_size x ensemble_size x num_species x time_points
                # new shape num_chunks*chunk_size*ensemble_size x time points x num_species
                data = data.reshape(
                    (np.prod(data.shape[:3]), data.shape[-1], data.shape[-2]))

                #append data from each round
                thetas.append(samples)
                #data_tot.append(data)

                #Split training and validation data
                #inputs, val_inputs, targets, val_targets = train_test_split(np.concatenate(data_tot, axis=0),
                #                                                                           np.concatenate(thetas, axis=0),
                #                                                                           train_size=0.95)

                inputs, val_inputs, targets, val_targets = train_test_split(
                    data, samples, train_size=0.8)

                #Construct the BNN model
                output_dim = targets.shape[-1]
                if not self._bnn_complied:
                    self._construct_bnn(inputs.shape[1:], output_dim,
                                        inputs.shape[0])

                ############## testing retrain re-compiled model
                if i > 0:
                    self.model._compile_model(prior=self.prior_function,
                                              proposal=proposal_tf,
                                              default=False)

                if self.model.normal:

                    #Start training
                    self._train(inputs, targets, val_inputs, val_targets)

                    #Approximate mixure of gaussians as a single gaussian
                    if exploit:
                        proposal_tf = self.model.model(self.data)
                        #proposal_m, proposal_var = MCinferMOG(self.data, self.model, self.num_monte_carlo, output_dim)
                        proposal_m, proposal_var = proposal_tf.mean(
                        ), proposal_tf.covariance()
                        proposal = GaussianPrior(proposal_m[0],
                                                 S=proposal_var[0])

                    #TODO: correction
                else:
                    raise ValueError(
                        "Current implementation only support Gaussian proposals, use add_normal = True when constructing BNN"
                    )

        except KeyboardInterrupt:
            if self.verbose:
                print(f"Terminating at round {i}")
            return np.array(thetas)
        except:
            raise
        if self.verbose:
            print(f"Done after {num_rounds} rounds")
        return proposal, np.array(thetas)
Ejemplo n.º 3
0
def test_uniform_prior():
    lb = np.asarray([1, 1])
    ub = np.asarray([5, 5])
    num_samples = 5
    prior_func = uniform_prior.UniformPrior(lb, ub)

    # multiprocessing mode
    samples = prior_func.draw(num_samples, chunk_size=1)
    assert len(
        samples
    ) == 5, "UniformPrior functional test error, expected chunk count mismatch"
    samples, = dask.compute(samples)
    samples = np.asarray(samples)
    assert samples.shape[
        0] == num_samples, "UniformPrior functional test error, expected sample count mismatch"
    assert samples.shape[
        1] == 1, "UniformPrior functional test error, expected chunk size mismatch"
    assert samples.shape[2] == len(
        lb), "UniformPrior functional test error, dimension mismatch"
    samples = samples.reshape(-1, len(lb))
    axis_mins = np.min(samples, 0)
    axis_maxs = np.max(samples, 0)
    assert axis_mins[0] > lb[0] and axis_maxs[0] < ub[0] and axis_mins[1] > lb[1] and axis_maxs[1] < ub[1], \
        "UniformPrior functional test error, drawn samples out of bounds"

    # Cluster mode
    c = Client()
    samples = prior_func.draw(num_samples, chunk_size=1)
    assert len(
        samples
    ) == 5, "UniformPrior functional test error, expected chunk count mismatch"
    samples, = dask.compute(samples)
    samples = np.asarray(samples)
    assert samples.shape[
        0] == num_samples, "UniformPrior functional test error, expected sample count mismatch"
    assert samples.shape[
        1] == 1, "UniformPrior functional test error, expected chunk size mismatch"
    assert samples.shape[2] == len(
        lb), "UniformPrior functional test error, dimension mismatch"
    samples = samples.reshape(-1, len(lb))
    axis_mins = np.min(samples, 0)
    axis_maxs = np.max(samples, 0)
    assert axis_mins[0] > lb[0] and axis_maxs[0] < ub[0] and axis_mins[1] > lb[1] and axis_maxs[1] < ub[1], \
        "UniformPrior functional test error, drawn samples out of bounds"

    # chunk_size = 2
    samples = prior_func.draw(num_samples, chunk_size=2)
    assert len(
        samples
    ) == 3, "UniformPrior functional test error, expected chunk count mismatch"
    samples, = dask.compute(samples)
    samples = np.asarray(samples)
    assert samples.shape[
        0] == 3, "UniformPrior functional test error, expected sample count mismatch"
    assert samples[-1].shape[
        0] == 2, "UniformPrior functional test error, expected chunk size mismatch"
    assert samples[-1].shape[1] == len(
        lb), "UniformPrior functional test error, dimension mismatch"
    samples = core._reshape_chunks(samples)
    axis_mins = np.min(samples, 0)
    axis_maxs = np.max(samples, 0)
    assert axis_mins[0] > lb[0] and axis_maxs[0] < ub[0] and axis_mins[1] > lb[1] and axis_maxs[1] < ub[1], \
        "UniformPrior functional test error, drawn samples out of bounds"
    c.close()
Ejemplo n.º 4
0
    def infer(self, num_samples, num_rounds, chunk_size=10, seed=None):
        np.random.seed(seed)
        theta = []
        local_sampler = CategoricalSampler(num_bins=self.num_bins)

        try:

            graph_dict = core.get_graph_chunked(self.prior_function,
                                                self.sim,
                                                batch_size=num_samples,
                                                chunk_size=chunk_size)

            for i in range(num_rounds):

                samples, data = dask.compute(graph_dict["parameters"],
                                             graph_dict["trajectories"])
                samples = core._reshape_chunks(samples)
                data = np.array(data)
                if self.verbose:
                    print('data shape: ', data.shape)

                #Reshaping for NN
                # standard is num_chunks x chunk_size x ensemble_size x num_species x time_points
                # new shape num_chunks*chunk_size*ensemble_size x time points x num_species
                data = data.reshape(
                    (np.prod(data.shape[:3]), data.shape[-1], data.shape[-2]))
                theta.append(samples)
                if i > 0:
                    data_, samples_ = _inBin(data, samples, theta[i])
                    data = np.append(data, data_, axis=0)
                    samples = np.append(samples, samples_, axis=0)

                #TODO: for every 2 combinations in parameter space
                #TODO: Change _create_train_val to not depend on self.train_thetas and
                #      self.train_ts
                self.train_thetas = samples
                self.train_ts = data

                #Create bins from continous data
                train_, val_, bins_ = self._create_train_val(self.num_bins)

                input_shape = (data.shape[-2], data.shape[-1])
                output_shape = len(bins_)

                num_train_examples = len(data)

                bnn = BNNModel(input_shape, output_shape, num_train_examples)
                if self.verbose:
                    print(bnn.model.summary())
                    print('num bins: ', len(bins_))
                    print('input_shape: ', input_shape)
                    print('data shape: ', data.shape)

                bnn.train(self.train_ts, train_, self.val_ts, val_)

                #TODO: adaptive_thresh[i]
                local_sampler.probs = bnn.mc_sampling(self.data,
                                                      self.num_monte_carlo)
                local_sampler.bins = bins_
                self.prior_function = local_sampler.sample

                graph_dict = core.get_graph_chunked(self.prior_function,
                                                    self.sim,
                                                    batch_size=num_samples,
                                                    chunk_size=chunk_size)
        except KeyboardInterrupt:
            return np.array(theta)
        except:
            raise
        return np.array(theta)
Ejemplo n.º 5
0
    def rejection_sampling(self, num_samples, batch_size, chunk_size,
                           ensemble_size, normalize):
        """
        Perform ABC inference according to initialized configuration.

        Parameters
        ----------
        num_samples : int
            The number of required accepted samples
        batch_size : int
            The batch size of samples for performing rejection sampling
        chunk_size : int
            the partition size when splitting the fixed data. For avoiding many individual tasks
            in dask if the data is large.
        
        Returns
        -------
        dict
            Keys
            'accepted_samples: The accepted parameter values', 
            'distances: Accepted distance values', 
            'accepted_count: Number of accepted samples',
            'trial_count: The number of total trials performed in order to converge',
            'inferred_parameters': The mean of accepted parameter samples
        """
        accepted_count = 0
        trial_count = 0
        accepted_samples = []
        distances = []

        # if fixed_mean has not been computed
        assert hasattr(
            self, "fixed_mean"), "Please call compute_fixed_mean before infer"

        # Get dask graph
        graph_dict = core.get_graph_chunked(self.prior_function, self.sim,
                                            self.summaries_function,
                                            batch_size, chunk_size)

        dist_func = lambda x: self.distance_function(self.fixed_mean, x)
        graph_dict["distances"] = core.get_distance(dist_func,
                                                    graph_dict["summarystats"],
                                                    chunked=True)

        cluster_mode = core._cluster_mode()

        # do rejection sampling
        #while accepted_count < num_samples:

        #sim_dist_scaled = []
        #params = []
        #dists = []

        # If dask cluster is used, use persist and futures, and scale as result is completed
        if cluster_mode:
            if self.use_logger:
                self.logger.info("running in cluster mode")
            res_param, res_dist = dask.persist(graph_dict["parameters"],
                                               graph_dict["distances"])

            futures_dist = core.get_futures(res_dist)
            futures_params = core.get_futures(res_param)

            keep_idx = {f.key: idx for idx, f in enumerate(futures_dist)}

            while accepted_count < num_samples:

                for f, dist in as_completed(futures_dist, with_results=True):
                    sim_dist_scaled = []
                    params = []
                    dists = []
                    for d in dist:
                        dists.append(d)
                        trial_count += 1
                        if normalize:
                            # Normalize distances between [0,1]
                            sim_dist_scaled.append(self.scale_distance(d))

                    idx = keep_idx[f.key]
                    param = futures_params[idx]
                    params_res = param.result()
                    for p in params_res:
                        params.append(p)

                    accepted_samples, distances, accepted_count = self._scale_reject(
                        sim_dist_scaled, dists, accepted_samples, distances,
                        params, accepted_count, normalize)
                    del dist, param  #TODO: remove all futures including simulation and summarystats
                    if accepted_count < num_samples:
                        new_chunk = core.get_graph_chunked(
                            self.prior_function, self.sim,
                            self.summaries_function, chunk_size, chunk_size)
                        new_chunk["distances"] = core.get_distance(
                            dist_func, new_chunk["summarystats"], chunked=True)

                        c_param, c_dist = dask.persist(new_chunk["parameters"],
                                                       new_chunk["distances"])
                        f_dist = core.get_futures(c_dist)[0]
                        f_param = core.get_futures(c_param)[0]
                        futures_dist.append(f_dist)
                        futures_params.append(f_param)

                        keep_idx[f_dist.key] = len(keep_idx)

                    else:
                        del futures_dist, futures_params, res_param, res_dist
                        self.results = {
                            'accepted_samples':
                            accepted_samples,
                            'distances':
                            distances,
                            'accepted_count':
                            accepted_count,
                            'trial_count':
                            trial_count,
                            'inferred_parameters':
                            np.mean(accepted_samples, axis=0)
                        }
                        return self.results

        # else use multiprocessing mode
        else:
            while accepted_count < num_samples:
                sim_dist_scaled = []
                params = []
                dists = []
                if self.use_logger:
                    self.logger.info("running in parallel mode")
                params, dists = dask.compute(graph_dict["parameters"],
                                             graph_dict["distances"])
                params = core._reshape_chunks(params)
                dists = core._reshape_chunks(dists)
                if normalize:
                    for d in dists:
                        sim_dist_scaled.append(self.scale_distance(d))

                accepted_samples, distances, accepted_count = self._scale_reject(
                    sim_dist_scaled, dists, accepted_samples, distances,
                    params, accepted_count, normalize)

                trial_count += batch_size

            self.results = {
                'accepted_samples': accepted_samples,
                'distances': distances,
                'accepted_count': accepted_count,
                'trial_count': trial_count,
                'inferred_parameters': np.mean(accepted_samples, axis=0)
            }
            return self.results