예제 #1
0
def test_param_sim():
    n_points = 10
    #graph_dict = core.get_dask_graph(
    #    param_func=uni_prior.draw, sim_func=simulator2, batch_size=n_points)
    #assert len(graph_dict["parameters"]
    #           ) == 10, "Core test failed, dimensions mismatch"
    #assert len(graph_dict["trajectories"]
    #           ) == 10, "Core test failed, dimensions mismatch"
    #assert graph_dict["summarystats"] is None, "Core test failed, expected None"
    #assert graph_dict["distances"] is None, "Core test failed, expected None"

    lhd = LatinHypercube(dmin, dmax)
    lhd.generate_array(n_points)
    graph_dict = core.get_graph_chunked(param_func=lhd.draw,
                                        sim_func=simulator2,
                                        batch_size=n_points,
                                        chunk_size=2)
    assert len(
        graph_dict["parameters"]) == 5, "Core test failed, dimensions mismatch"
    assert len(graph_dict["trajectories"]
               ) == 5, "Core test failed, dimensions mismatch"
    assert graph_dict["summarystats"] is None, "Core test failed, expected None"

    params, sim = dask.compute(graph_dict["parameters"],
                               graph_dict["trajectories"])

    sim = np.asarray(sim)
    params = np.asarray(params)

    assert params.shape == (5, 2, 5), "Core test failed, dimensions mismatch"
    assert sim.shape == (5, 2, 1, 2,
                         101), "Core test failed, dimensions mismatch"

    # all points have been sampled from lhd, default auto_redesign = True

    graph_dict = core.get_graph_chunked(param_func=lhd.draw,
                                        sim_func=simulator2,
                                        batch_size=n_points,
                                        chunk_size=2)
    assert len(
        graph_dict["parameters"]) == 5, "Core test failed, dimensions mismatch"
    assert len(graph_dict["trajectories"]
               ) == 5, "Core test failed, dimensions mismatch"
    assert graph_dict["summarystats"] is None, "Core test failed, expected None"

    params, sim = dask.compute(graph_dict["parameters"],
                               graph_dict["trajectories"])

    sim = np.asarray(sim)
    params = np.asarray(params)

    assert params.shape == (5, 2, 5), "Core test failed, dimensions mismatch"
    assert sim.shape == (5, 2, 1, 2,
                         101), "Core test failed, dimensions mismatch"
예제 #2
0
def test_simple_chunked():

    graph_dict = core.get_graph_chunked(param_func=simple_sampler_chunked,
                                        sim_func=simple_sim,
                                        summaries_func=simple_summ,
                                        batch_size=10,
                                        chunk_size=2)

    assert len(
        graph_dict["parameters"]) == 5, "Core test failed, dimensions mismatch"
    assert len(graph_dict["trajectories"]
               ) == 5, "Core test failed, dimensions mismatch"
    assert len(graph_dict["summarystats"]
               ) == 5, "Core test failed, dimensions mismatch"

    params, sim, summ = dask.compute(graph_dict["parameters"],
                                     graph_dict["trajectories"],
                                     graph_dict["summarystats"])

    sim = np.asarray(sim)
    summ = np.asarray(summ)
    params = np.asarray(params)

    assert params.shape == (5, 2, 2), "Core test failed, dimensions mismatch"
    assert sim.shape == (5, 2, 2), "Core test failed, dimensions mismatch"
    assert summ.shape == (5, 2, 2), "Core test failed, dimensions mismatch"

    graph_dict = core.get_graph_chunked(param_func=simple_sampler_chunked,
                                        sim_func=simple_sim,
                                        batch_size=10,
                                        chunk_size=2)

    assert len(
        graph_dict["parameters"]) == 5, "Core test failed, dimensions mismatch"
    assert len(graph_dict["trajectories"]
               ) == 5, "Core test failed, dimensions mismatch"
    assert graph_dict[
        "summarystats"] is None, "Core test failed, excpected None"

    params, sim = dask.compute(graph_dict["parameters"],
                               graph_dict["trajectories"])

    sim = np.asarray(sim)
    params = np.asarray(params)

    assert params.shape == (5, 2, 2), "Core test failed, dimensions mismatch"
    assert sim.shape == (5, 2, 2), "Core test failed, dimensions mismatch"
예제 #3
0
def test_param_sim_summ():
    lhd = LatinHypercube(dmin, dmax)
    n_points = 10
    lhd.generate_array(n_points)
    summ = lambda x: generate_tsfresh_features(x, MinimalFCParameters())
    graph_dict = core.get_graph_chunked(param_func=lhd.draw,
                                        sim_func=simulator2,
                                        summaries_func=summ,
                                        batch_size=n_points,
                                        chunk_size=2)
    assert len(
        graph_dict["parameters"]) == 5, "Core test failed, dimensions mismatch"
    assert len(graph_dict["trajectories"]
               ) == 5, "Core test failed, dimensions mismatch"
    assert len(
        graph_dict["summarystats"]) == 5, "Core test failed, expected None"

    params, sim, summaries = dask.compute(graph_dict["parameters"],
                                          graph_dict["trajectories"],
                                          graph_dict["summarystats"])

    sim = np.asarray(sim)
    params = np.asarray(params)
    summaries = np.asarray(summaries)

    assert params.shape == (5, 2, 5), "Core test failed, dimensions mismatch"
    assert sim.shape == (5, 2, 1, 2,
                         101), "Core test failed, dimensions mismatch"
    assert summaries.shape == (5, 2, 1,
                               16), "Core test failed, dimensions mismatch"

    fixed_data = np.asarray([simulator2(bound) for p in range(10)])
    print(fixed_data.shape)
    fixed_data = fixed_data.reshape(10, 2, 101)

    fixed_mean = core.get_fixed_mean(fixed_data, summ, chunk_size=2)

    m, = dask.compute(fixed_mean)
    m = np.asarray(m)
    assert m.shape == (1, 16), "Core test failed, dimensions mismatch"

    dist_class = ns.NaiveSquaredDistance()

    dist_func = lambda x: dist_class.compute(x, m)

    dist = core.get_distance(dist_func, graph_dict["summarystats"])

    assert len(dist) == 5, "Core test failed, dimesnion mismatch"

    dist_res, = dask.compute(dist)
    dist_res = np.asarray(dist_res)

    assert dist_res.shape == (5, 2, 1,
                              16), "Core test failed, dimension mismatch"
예제 #4
0
    def infer(self,
              num_samples,
              alpha=0.5,
              R_trial=10,
              c=0.01,
              p_min=0.05,
              batch_size=10,
              chunk_size=1):
        """Performs SMC-ABC.

        Parameters
        ----------
        num_samples : int
            The number of required accepted samples
        alpha : float
            Culling percentage
        R_trial : int
            Number of perturbs per replenishment to estimate probability
        c : float
            Sensitivity for more perturbations
        p_min : float
            Termination condition as a probability of a successul perturbation

        Returns
        -------
        dict
            Keys
            'accepted_samples: The accepted parameter values',
            'distances: Accepted distance values'
        """

        assert hasattr(
            self, "fixed_mean"), "Please call compute_fixed_mean before infer"

        # Get the dask graph and add another distances task to it
        graph_dict = core.get_graph_chunked(self.prior_function.draw, self.sim,
                                            self.summaries_function,
                                            batch_size, chunk_size)
        dist_func = lambda x: self.distance_function(self.fixed_mean, x)
        graph_dict["distances"] = core.get_distance(dist_func,
                                                    graph_dict["summarystats"],
                                                    chunked=True)

        # Culling Cutoff
        n_cull = round(alpha * num_samples)

        # Draw the initial population and compute distances
        population, distances = dask.compute(graph_dict['parameters'],
                                             graph_dict['distances'])
        population = core._reshape_chunks(population)
        distances = core._reshape_chunks(distances)

        while population.shape[0] < num_samples:
            params, dists = dask.compute(graph_dict["parameters"],
                                         graph_dict["distances"])
            params = core._reshape_chunks(params)
            dists = core._reshape_chunks(dists)
            population = np.vstack([population, params])
            distances = np.vstack([distances, dists])

        population = population[:num_samples]
        distances = distances[:num_samples, 0]

        terminate = False
        while not terminate:

            try:
                # Sort population by distance
                sorted_idxs = np.argsort(distances)
                population = population[sorted_idxs]
                distances = distances[sorted_idxs]

                # Cull the last Na
                population = population[:n_cull]
                distances = distances[:n_cull]
                tol = distances[-1]

                # Resample with replacement to replenish in the population
                resampled_idxs = np.random.choice(n_cull, num_samples - n_cull)
                population = np.vstack(
                    [population, population[resampled_idxs]])
                distances = np.concatenate(
                    [distances, distances[resampled_idxs]])

                # Adapt transition kernel using the new population
                self.perturbation_kernel.adapt(population)

                # For each replenished value, perturb and resample a few time
                # to get an idea of how easy it is to move to a lower distance
                perturb_tasks = []
                for i in range(n_cull, num_samples):
                    perturb_tasks.append(
                        self._perturb_resample(population[i, :], distances[i],
                                               R_trial, tol))
                res, = dask.compute(perturb_tasks)

                # Update the population with the perturbed population
                updated_ps, updated_distances, update_p_accs, N_accs = list(
                    zip(*res))

                population[n_cull:] = np.vstack(updated_ps)
                distances[n_cull:] = np.asarray(updated_distances)

                # Update metrics from the trial to estimate the probability
                # of a move to assess convergence and decide how many more
                # perturbation attempts to make
                p_acc = np.sum(update_p_accs) / (num_samples - n_cull)
                N_acc = np.sum(N_accs)

                R = int(round(np.log(c) / np.log(1 - p_acc)))

                # Perturb again with better estimate
                perturb_tasks = []
                for i in range(n_cull, num_samples):
                    perturb_tasks.append(
                        self._perturb_resample(population[i, :], distances[i],
                                               R - R_trial, tol))
                res, = dask.compute(perturb_tasks)

                updated_ps, updated_distances, update_p_accs, N_accs = list(
                    zip(*res))

                population[n_cull:] = np.vstack(updated_ps)
                distances[n_cull:] = np.asarray(updated_distances)

                p_acc += np.sum(update_p_accs) / (num_samples - n_cull)
                N_acc += np.sum(N_accs)

                print("Tol : {}, R : {}, p_acc : {}".format(tol, R, p_acc))
                if p_acc < p_min:
                    terminate = True
            except KeyboardInterrupt:
                return {'accepted_samples': population, 'distances': distances}
            except:
                raise

        return {'accepted_samples': population, 'distances': distances}
예제 #5
0
    def compute(self, n_points=None, chunk_size=None, predictor=None):
        """
        Computes a batch of the parameter sweep.

        Parameters
        ----------
        n_points : int, optional. The batch size of the sweep. Defaults to default_batch_size.
        chunk_size : int, sets the chunk size
        predictor : function, optional. Use a model predictor based on the features as input as the 
                    final step of the workflow. The predictor function must take an array with the
                    same length as the joined feature output. 
                    TODO: currently only supports joined features    

        """
        cluster_mode = core._cluster_mode()
        if n_points is None:
            n_points = self.batch_size
        if chunk_size is None:
            chunk_size = self.chunk_size

        graph_dict = core.get_graph_chunked(self.sampling.draw,
                                            self.simulator,
                                            self.summaries.compute,
                                            batch_size=n_points,
                                            chunk_size=chunk_size)
        pred = []
        if predictor is not None:
            if callable(predictor):
                pred = core.get_prediction(predictor,
                                           graph_dict["summarystats"])
            else:
                raise ValueError("The predictor must be a callable function")
            # persist at workers, will run in background
            if cluster_mode:
                params_res, processed_res, result_res, pred_res = persist(
                    graph_dict["parameters"], graph_dict["trajectories"],
                    graph_dict["summarystats"], pred)
                # convert to futures
                futures = core.get_futures(result_res)
                f_pred = core.get_futures(pred_res)
                f_params = core.get_futures(params_res)
                f_ts = core.get_futures(processed_res)

                # keep track of indices...
                f_dict = {f.key: idx for idx, f in enumerate(f_pred)}
                # ..as we collect result on a "as completed" basis
                for f, pred in as_completed(f_pred, with_results=True):
                    idx = f_dict[f.key]
                    # get the parameter point
                    params = f_params[idx].result()
                    # get the trajatories
                    trajs = f_ts[idx].result()
                    #get summary stats
                    stats = futures[idx].result()
                    # add to data collection
                    param = np.asarray(params)
                    traj = np.asarray(trajs)
                    stats = np.asarray(stats)
                    pred = np.asarray(pred)
                    self.data.add_points(inputs=param,
                                         time_series=traj,
                                         summary_stats=stats,
                                         user_labels=np.ones(len(stats)) * -1,
                                         targets=pred)
            else:
                params_res, processed_res, result_res, pred_res = compute(
                    graph_dict["parameters"], graph_dict["trajectories"],
                    graph_dict["summarystats"], pred)
                for e, pred in enumerate(pred_res):
                    param = np.asarray(params_res[e])
                    ts = np.asarray(processed_res[e])
                    stats = np.asarray(result_res[e])
                    pred = np.asarray(pred)
                    self.data.add_points(inputs=param,
                                         time_series=ts,
                                         summary_stats=stats,
                                         user_labels=np.ones(len(pred)) * -1,
                                         targets=pred)

        else:
            # TODO: avoid redundancy...
            if cluster_mode:
                params_res, processed_res, result_res = persist(
                    graph_dict["parameters"], graph_dict["trajectories"],
                    graph_dict["summarystats"])

                # convert to futures
                futures = core.get_futures(result_res)
                f_params = core.get_futures(params_res)
                f_ts = core.get_futures(processed_res)

                # keep track of indices...
                f_dict = {f.key: idx for idx, f in enumerate(futures)}
                # ..as we collect result on a "as completed" basis
                for f, res in as_completed(futures, with_results=True):
                    idx = f_dict[f.key]
                    # get the parameter point
                    params = f_params[idx].result()
                    # get the trajatories
                    trajs = f_ts[idx].result()
                    # add to data collection
                    param = np.asarray(params)
                    traj = np.asarray(trajs)
                    res = np.asarray(res)
                    self.data.add_points(inputs=param,
                                         time_series=traj,
                                         summary_stats=res,
                                         user_labels=np.ones(len(res)) * -1)
            else:
                params_res, processed_res, result_res = compute(
                    graph_dict["parameters"], graph_dict["trajectories"],
                    graph_dict["summarystats"])
                for e, res in enumerate(result_res):
                    param = np.asarray(params_res[e])
                    ts = np.asarray(processed_res[e])
                    res = np.asarray(res)
                    self.data.add_points(inputs=param,
                                         time_series=ts,
                                         summary_stats=res,
                                         user_labels=np.ones(len(res)) * -1)
예제 #6
0
    def infer(self,
              num_samples,
              num_rounds,
              chunk_size=10,
              exploit=True,
              seed=None):
        np.random.seed(seed)
        thetas = []
        data_tot = []
        proposal = self.prior_function

        try:
            for i in range(num_rounds):

                graph_dict = core.get_graph_chunked(proposal.draw,
                                                    self.sim,
                                                    batch_size=num_samples,
                                                    chunk_size=chunk_size)

                if self.verbose:
                    print(f"starting round {i}")

                #Simulate data
                samples, data = dask.compute(graph_dict["parameters"],
                                             graph_dict["trajectories"])
                samples = core._reshape_chunks(samples)
                data = np.array(data)
                if self.verbose:
                    print('data shape: ', data.shape)

                #Reshaping for NN
                # standard is num_chunks x chunk_size x ensemble_size x num_species x time_points
                # new shape num_chunks*chunk_size*ensemble_size x time points x num_species
                data = data.reshape(
                    (np.prod(data.shape[:3]), data.shape[-1], data.shape[-2]))

                #append data from each round
                thetas.append(samples)
                #data_tot.append(data)

                #Split training and validation data
                #inputs, val_inputs, targets, val_targets = train_test_split(np.concatenate(data_tot, axis=0),
                #                                                                           np.concatenate(thetas, axis=0),
                #                                                                           train_size=0.95)

                inputs, val_inputs, targets, val_targets = train_test_split(
                    data, samples, train_size=0.8)

                #Construct the BNN model
                output_dim = targets.shape[-1]
                if not self._bnn_complied:
                    self._construct_bnn(inputs.shape[1:], output_dim,
                                        inputs.shape[0])

                ############## testing retrain re-compiled model
                if i > 0:
                    self.model._compile_model(prior=self.prior_function,
                                              proposal=proposal_tf,
                                              default=False)

                if self.model.normal:

                    #Start training
                    self._train(inputs, targets, val_inputs, val_targets)

                    #Approximate mixure of gaussians as a single gaussian
                    if exploit:
                        proposal_tf = self.model.model(self.data)
                        #proposal_m, proposal_var = MCinferMOG(self.data, self.model, self.num_monte_carlo, output_dim)
                        proposal_m, proposal_var = proposal_tf.mean(
                        ), proposal_tf.covariance()
                        proposal = GaussianPrior(proposal_m[0],
                                                 S=proposal_var[0])

                    #TODO: correction
                else:
                    raise ValueError(
                        "Current implementation only support Gaussian proposals, use add_normal = True when constructing BNN"
                    )

        except KeyboardInterrupt:
            if self.verbose:
                print(f"Terminating at round {i}")
            return np.array(thetas)
        except:
            raise
        if self.verbose:
            print(f"Done after {num_rounds} rounds")
        return proposal, np.array(thetas)
예제 #7
0
    def infer(self, num_samples, num_rounds, chunk_size=10, seed=None):
        np.random.seed(seed)
        theta = []
        local_sampler = CategoricalSampler(num_bins=self.num_bins)

        try:

            graph_dict = core.get_graph_chunked(self.prior_function,
                                                self.sim,
                                                batch_size=num_samples,
                                                chunk_size=chunk_size)

            for i in range(num_rounds):

                samples, data = dask.compute(graph_dict["parameters"],
                                             graph_dict["trajectories"])
                samples = core._reshape_chunks(samples)
                data = np.array(data)
                if self.verbose:
                    print('data shape: ', data.shape)

                #Reshaping for NN
                # standard is num_chunks x chunk_size x ensemble_size x num_species x time_points
                # new shape num_chunks*chunk_size*ensemble_size x time points x num_species
                data = data.reshape(
                    (np.prod(data.shape[:3]), data.shape[-1], data.shape[-2]))
                theta.append(samples)
                if i > 0:
                    data_, samples_ = _inBin(data, samples, theta[i])
                    data = np.append(data, data_, axis=0)
                    samples = np.append(samples, samples_, axis=0)

                #TODO: for every 2 combinations in parameter space
                #TODO: Change _create_train_val to not depend on self.train_thetas and
                #      self.train_ts
                self.train_thetas = samples
                self.train_ts = data

                #Create bins from continous data
                train_, val_, bins_ = self._create_train_val(self.num_bins)

                input_shape = (data.shape[-2], data.shape[-1])
                output_shape = len(bins_)

                num_train_examples = len(data)

                bnn = BNNModel(input_shape, output_shape, num_train_examples)
                if self.verbose:
                    print(bnn.model.summary())
                    print('num bins: ', len(bins_))
                    print('input_shape: ', input_shape)
                    print('data shape: ', data.shape)

                bnn.train(self.train_ts, train_, self.val_ts, val_)

                #TODO: adaptive_thresh[i]
                local_sampler.probs = bnn.mc_sampling(self.data,
                                                      self.num_monte_carlo)
                local_sampler.bins = bins_
                self.prior_function = local_sampler.sample

                graph_dict = core.get_graph_chunked(self.prior_function,
                                                    self.sim,
                                                    batch_size=num_samples,
                                                    chunk_size=chunk_size)
        except KeyboardInterrupt:
            return np.array(theta)
        except:
            raise
        return np.array(theta)
예제 #8
0
    def rejection_sampling(self, num_samples, batch_size, chunk_size,
                           ensemble_size, normalize):
        """
        Perform ABC inference according to initialized configuration.

        Parameters
        ----------
        num_samples : int
            The number of required accepted samples
        batch_size : int
            The batch size of samples for performing rejection sampling
        chunk_size : int
            the partition size when splitting the fixed data. For avoiding many individual tasks
            in dask if the data is large.
        
        Returns
        -------
        dict
            Keys
            'accepted_samples: The accepted parameter values', 
            'distances: Accepted distance values', 
            'accepted_count: Number of accepted samples',
            'trial_count: The number of total trials performed in order to converge',
            'inferred_parameters': The mean of accepted parameter samples
        """
        accepted_count = 0
        trial_count = 0
        accepted_samples = []
        distances = []

        # if fixed_mean has not been computed
        assert hasattr(
            self, "fixed_mean"), "Please call compute_fixed_mean before infer"

        # Get dask graph
        graph_dict = core.get_graph_chunked(self.prior_function, self.sim,
                                            self.summaries_function,
                                            batch_size, chunk_size)

        dist_func = lambda x: self.distance_function(self.fixed_mean, x)
        graph_dict["distances"] = core.get_distance(dist_func,
                                                    graph_dict["summarystats"],
                                                    chunked=True)

        cluster_mode = core._cluster_mode()

        # do rejection sampling
        #while accepted_count < num_samples:

        #sim_dist_scaled = []
        #params = []
        #dists = []

        # If dask cluster is used, use persist and futures, and scale as result is completed
        if cluster_mode:
            if self.use_logger:
                self.logger.info("running in cluster mode")
            res_param, res_dist = dask.persist(graph_dict["parameters"],
                                               graph_dict["distances"])

            futures_dist = core.get_futures(res_dist)
            futures_params = core.get_futures(res_param)

            keep_idx = {f.key: idx for idx, f in enumerate(futures_dist)}

            while accepted_count < num_samples:

                for f, dist in as_completed(futures_dist, with_results=True):
                    sim_dist_scaled = []
                    params = []
                    dists = []
                    for d in dist:
                        dists.append(d)
                        trial_count += 1
                        if normalize:
                            # Normalize distances between [0,1]
                            sim_dist_scaled.append(self.scale_distance(d))

                    idx = keep_idx[f.key]
                    param = futures_params[idx]
                    params_res = param.result()
                    for p in params_res:
                        params.append(p)

                    accepted_samples, distances, accepted_count = self._scale_reject(
                        sim_dist_scaled, dists, accepted_samples, distances,
                        params, accepted_count, normalize)
                    del dist, param  #TODO: remove all futures including simulation and summarystats
                    if accepted_count < num_samples:
                        new_chunk = core.get_graph_chunked(
                            self.prior_function, self.sim,
                            self.summaries_function, chunk_size, chunk_size)
                        new_chunk["distances"] = core.get_distance(
                            dist_func, new_chunk["summarystats"], chunked=True)

                        c_param, c_dist = dask.persist(new_chunk["parameters"],
                                                       new_chunk["distances"])
                        f_dist = core.get_futures(c_dist)[0]
                        f_param = core.get_futures(c_param)[0]
                        futures_dist.append(f_dist)
                        futures_params.append(f_param)

                        keep_idx[f_dist.key] = len(keep_idx)

                    else:
                        del futures_dist, futures_params, res_param, res_dist
                        self.results = {
                            'accepted_samples':
                            accepted_samples,
                            'distances':
                            distances,
                            'accepted_count':
                            accepted_count,
                            'trial_count':
                            trial_count,
                            'inferred_parameters':
                            np.mean(accepted_samples, axis=0)
                        }
                        return self.results

        # else use multiprocessing mode
        else:
            while accepted_count < num_samples:
                sim_dist_scaled = []
                params = []
                dists = []
                if self.use_logger:
                    self.logger.info("running in parallel mode")
                params, dists = dask.compute(graph_dict["parameters"],
                                             graph_dict["distances"])
                params = core._reshape_chunks(params)
                dists = core._reshape_chunks(dists)
                if normalize:
                    for d in dists:
                        sim_dist_scaled.append(self.scale_distance(d))

                accepted_samples, distances, accepted_count = self._scale_reject(
                    sim_dist_scaled, dists, accepted_samples, distances,
                    params, accepted_count, normalize)

                trial_count += batch_size

            self.results = {
                'accepted_samples': accepted_samples,
                'distances': distances,
                'accepted_count': accepted_count,
                'trial_count': trial_count,
                'inferred_parameters': np.mean(accepted_samples, axis=0)
            }
            return self.results