Exemple #1
0
def compare_get_global_values(i, provide_shape):
    """
    Compares the results of get global values for a vector of shape ``(comm.size*2,
    i)``.
    
    :param int i: Dimension of the vector of length ``comm.size*2``

    """
    if comm.rank == 0:
        if i == 0:
            original_array = np.array(np.random.random((comm.size * 2, )))
        else:
            original_array = np.array(np.random.random((comm.size * 2, i)))
    else:
        original_array = None
    original_array = comm.bcast(original_array)
    my_len = original_array.shape[0] / comm.size
    my_index = range(0 + comm.rank * my_len, (comm.rank + 1) * my_len)
    if i == 0:
        my_array = original_array[my_index]
    else:
        my_array = original_array[my_index, :]
    if provide_shape:
        recomposed_array = util.get_global_values(my_array,
                                                  original_array.shape)
    else:
        recomposed_array = util.get_global_values(my_array)
    nptest.assert_array_equal(original_array, recomposed_array)
Exemple #2
0
def compare_get_global_values(i, provide_shape):
    """
    Compares the results of get global values for a vector of shape ``(comm.size*2,
    i)``.
    
    :param int i: Dimension of the vector of length ``comm.size*2``

    """
    if comm.rank == 0:
        if i == 0:
            original_array = np.array(np.random.random((comm.size * 2,)))
        else:
            original_array = np.array(np.random.random((comm.size * 2, i)))
    else:
        original_array = None
    original_array = comm.bcast(original_array)
    my_len = original_array.shape[0] / comm.size
    my_index = range(0 + comm.rank * my_len, (comm.rank + 1) * my_len)
    if i == 0:
        my_array = original_array[my_index]
    else:
        my_array = original_array[my_index, :]
    if provide_shape:
        recomposed_array = util.get_global_values(my_array, original_array.shape)
    else:
        recomposed_array = util.get_global_values(my_array)
    nptest.assert_array_equal(original_array, recomposed_array)
Exemple #3
0
def prob(samples, data, rho_D_M, d_distr_samples, d_Tree=None): 
    r"""
    
    Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples}})`, the
    probability assoicated with a set of voronoi cells defined by the model
    solves at :math:`(\lambda_{samples})` where the volumes of these voronoi
    cells are assumed to be equal under the MC assumption.

    :param samples: The samples in parameter space for which the model was run.
    :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim)
    :param data: The data from running the model given the samples.
    :type data: :class:`~numpy.ndarray` of size (num_samples, mdim)
    :param rho_D_M: The simple function approximation of rho_D
    :type rho_D_M: :class:`~numpy.ndarray` of shape  (M,) 
    :param d_distr_samples: The samples in the data space that define a
        parition of D to for the simple function approximation
    :type d_distr_samples: :class:`~numpy.ndarray` of shape  (M, mdim) 
    :param d_Tree: :class:`~scipy.spatial.KDTree` for d_distr_samples
    :rtype: tuple of :class:`~numpy.ndarray` of sizes (num_samples,),
        (num_samples,), (ndim, num_l_emulate), (num_samples,), (num_l_emulate,)
    :returns: (P, lam_vol, io_ptr) where P is the
        probability associated with samples, and lam_vol the volumes associated
        with the samples, io_ptr a pointer from data to M bins.

    """
    if len(samples.shape) == 1:
        samples = np.expand_dims(samples, axis=1) 
    if len(data.shape) == 1:
        data = np.expand_dims(data, axis=1) 
    if len(d_distr_samples.shape) == 1:
        d_distr_samples = np.expand_dims(d_distr_samples, axis=1)
    if type(d_Tree) == type(None):
        d_Tree = spatial.KDTree(d_distr_samples)

    # Set up local arrays for parallelism
    local_index = range(0+comm.rank, samples.shape[0], comm.size)
    samples_local = samples[local_index, :]
    data_local = data[local_index, :]
    local_array = np.array(local_index, dtype='int64')
        
    # Determine which inputs go to which M bins using the QoI
    (_, io_ptr) = d_Tree.query(data_local)

    # Apply the standard MC approximation and
    # calculate probabilities
    P_local = np.zeros((samples_local.shape[0],))
    for i in range(rho_D_M.shape[0]):
        Itemp = np.equal(io_ptr, i)
        Itemp_sum = np.sum(Itemp)
        Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM)
        if Itemp_sum > 0:
            P_local[Itemp] = rho_D_M[i]/Itemp_sum 
    P_global = util.get_global_values(P_local)
    global_index = util.get_global_values(local_array)
    P = np.zeros(P_global.shape)
    P[global_index] = P_global[:]

    lam_vol = (1.0/float(samples.shape[0]))*np.ones((samples.shape[0],))

    return (P, lam_vol, io_ptr)
Exemple #4
0
    def user_samples(self, samples, savefile, parallel=False):
        """
        Samples the model at ``samples`` and saves the results.

        Note: There are many ways to generate samples on a regular grid in
        Numpy and other Python packages. Instead of reimplementing them here we
        provide sampler that utilizes user specified samples.

        :param samples: samples to evaluate the model at
        :type samples: :class:`~numpy.ndarray` of shape (num_smaples, ndim)
        :param string savefile: filename to save samples and data
        :param bool parallel: Flag for parallel implementation. Uses
            lowercase ``mpi4py`` methods if ``samples.shape[0]`` is not
            divisible by ``size``. Default value is ``False``. 
        :rtype: tuple
        :returns: (``parameter_samples``, ``data_samples``) where
            ``parameter_samples`` is np.ndarray of shape (num_samples, ndim)
            and ``data_samples`` is np.ndarray of shape (num_samples, mdim)

        """

        # Update the number of samples
        self.num_samples = samples.shape[0]

        # Solve the model at the samples
        if not (parallel) or comm.size == 1:
            data = self.lb_model(samples)
        elif parallel:
            my_len = self.num_samples / comm.size
            if comm.rank != comm.size - 1:
                my_index = range(0 + comm.rank * my_len,
                                 (comm.rank + 1) * my_len)
            else:
                my_index = range(0 + comm.rank * my_len, self.num_samples)
            if len(samples.shape) == 1:
                my_samples = samples[my_index]
            else:
                my_samples = samples[my_index, :]
            my_data = self.lb_model(my_samples)
            data = util.get_global_values(my_data)
            samples = util.get_global_values(my_samples)

        # if data or samples are of shape (num_samples,) expand dimensions
        if len(samples.shape) == 1:
            samples = np.expand_dims(samples, axis=1)
        if len(data.shape) == 1:
            data = np.expand_dims(data, axis=1)

        mdat = dict()
        self.update_mdict(mdat)
        mdat['samples'] = samples
        mdat['data'] = data

        if comm.rank == 0:
            self.save(mdat, savefile)

        return (samples, data)
Exemple #5
0
    def user_samples(self, samples, savefile, parallel=False):
        """
        Samples the model at ``samples`` and saves the results.

        Note: There are many ways to generate samples on a regular grid in
        Numpy and other Python packages. Instead of reimplementing them here we
        provide sampler that utilizes user specified samples.

        :param samples: samples to evaluate the model at
        :type samples: :class:`~numpy.ndarray` of shape (num_smaples, ndim)
        :param string savefile: filename to save samples and data
        :param bool parallel: Flag for parallel implementation. Uses
            lowercase ``mpi4py`` methods if ``samples.shape[0]`` is not
            divisible by ``size``. Default value is ``False``. 
        :rtype: tuple
        :returns: (``parameter_samples``, ``data_samples``) where
            ``parameter_samples`` is np.ndarray of shape (num_samples, ndim)
            and ``data_samples`` is np.ndarray of shape (num_samples, mdim)

        """
        
        # Update the number of samples
        self.num_samples = samples.shape[0]

        # Solve the model at the samples
        if not(parallel) or comm.size == 1:
            data = self.lb_model(samples)
        elif parallel:
            my_len = self.num_samples/comm.size
            if comm.rank != comm.size-1:
                my_index = range(0+comm.rank*my_len, (comm.rank+1)*my_len)
            else:
                my_index = range(0+comm.rank*my_len, self.num_samples)
            if len(samples.shape) == 1:
                my_samples = samples[my_index]
            else:
                my_samples = samples[my_index, :]
            my_data = self.lb_model(my_samples)
            data = util.get_global_values(my_data)
            samples = util.get_global_values(my_samples)
        
        # if data or samples are of shape (num_samples,) expand dimensions
        if len(samples.shape) == 1:
            samples = np.expand_dims(samples, axis=1)
        if len(data.shape) == 1:
            data = np.expand_dims(data, axis=1)


        mdat = dict()
        self.update_mdict(mdat)
        mdat['samples'] = samples
        mdat['data'] = data

        if comm.rank == 0:
            self.save(mdat, savefile)
        
        return (samples, data)
Exemple #6
0
    def globalize_ptrs(self):
        r"""
        Globalizes comparison pointers by caling ``get_global_values``
        for both the left and right sample sets.

        """
        if (self._ptr_left_local is not None) and\
                (self._ptr_left is None):
            self._ptr_left = util.get_global_values(
                self._ptr_left_local)
        if (self._ptr_right_local is not None) and\
                (self._ptr_right is None):
            self._ptr_right = util.get_global_values(
                self._ptr_right_local)
Exemple #7
0
 def setUp(self):
     """
     Set up problem.
     """
     super(Test_prob_emulated_1to1, self).setUp()
     (self.P_emulate, self.lambda_emulate, _, _) =\
             calcP.prob_emulated(samples=self.samples, data=self.data,
                     rho_D_M=self.d_distr_prob,
                     d_distr_samples=self.d_distr_samples,
                     lambda_emulate=self.lambda_emulate, d_Tree=self.d_Tree)
     self.P_emulate = util.get_global_values(self.P_emulate)
Exemple #8
0
 def setUp(self):
     """
     Set up problem.
     """
     super(Test_prob_emulated_3to1, self).setUp()
     (self.P_emulate, self.lambda_emulate, _, _) = calcP.prob_emulated(\
             samples=self.samples, data=self.data,
             rho_D_M=self.d_distr_prob,
             d_distr_samples=self.d_distr_samples,
             lambda_emulate=self.lambda_emulate, d_Tree=self.d_Tree)
     self.P_emulate_ref = np.loadtxt(data_path+"/3to1_prob_emulated.txt.gz")
     self.P_emulate = util.get_global_values(self.P_emulate)
Exemple #9
0
def postprocess(station_nums, ref_num):
    
    filename = 'P_q'+str(station_nums[0]+1)+'_q'+str(station_nums[1]+1)
    if len(station_nums) == 3:
        filename += '_q'+str(station_nums[2]+1)
    filename += '_truth_'+str(ref_num+1)

    data = Q[:, station_nums]
    q_ref = Q_ref[ref_num, station_nums]

    # Create Simple function approximation
    # Save points used to parition D for simple function approximation and the
    # approximation itself (this can be used to make close comparisions...)
    (rho_D_M, d_distr_samples, d_Tree) = sfun.uniform_hyperrectangle(data,
            q_ref, bin_ratio=0.15,
            center_pts_per_edge=np.ones((data.shape[1],)))

    num_l_emulate = 1e6
    lambda_emulate = calcP.emulate_iid_lebesgue(lam_domain, num_l_emulate)
    print "Finished emulating lambda samples"

    # Calculate P on the actual samples estimating voronoi cell volume with MC
    # integration
    (P3, lam_vol3, lambda_emulate3, io_ptr3, emulate_ptr3) = calcP.prob_mc(samples,
            data, rho_D_M, d_distr_samples, lam_domain, lambda_emulate, d_Tree)
    print "Calculating prob_mc"
    mdict = dict()
    mdict['rho_D_M'] = rho_D_M
    mdict['d_distr_samples'] = d_distr_samples 
    mdict['lambda_emulate'] = util.get_global_values(lambda_emulate)   
    mdict['num_l_emulate'] = mdict['lambda_emulate'].shape[1]
    mdict['P3'] = util.get_global_values(P3)
    mdict['lam_vol3'] = util.get_global_values(lam_vol3)
    mdict['io_ptr3'] = util.get_global_values(io_ptr3)
    mdict['emulate_ptr3'] = emulate_ptr3
        
    if rank == 0:
        # Export P and compare to MATLAB solution visually
        sio.savemat(filename, mdict, do_compression=True)
Exemple #10
0
def my_model(io_file_name):
    # read in input from file
    io_mdat = sio.loadmat(io_file_name)
    input = io_mdat['input']
    # localize input
    input_local = np.array_split(input, comm.size)[comm.rank]
    # model is y = x[:, 0:dim/2 ] + x[:, dim/2:]
    output_local = sum(np.split(input_local, 2, 1))
    # save output to file
    io_mdat['output'] = util.get_global_values(output_local)
    comm.barrier()
    if comm.rank == 0:
        sio.savemat(io_file_name, io_mdat)
Exemple #11
0
def my_model(io_file_name):
    # read in input from file
    io_mdat = sio.loadmat(io_file_name)
    input = io_mdat['input']
    # localize input
    input_local = np.array_split(input, comm.size)[comm.rank]
    # model is y = x[:, 0:dim/2 ] + x[:, dim/2:]
    output_local = sum(np.split(input_local, 2, 1))
    # save output to file
    io_mdat['output'] = util.get_global_values(output_local)
    comm.barrier()
    if comm.rank == 0:
        sio.savemat(io_file_name, io_mdat)
Exemple #12
0
    def setUp(self):
        """
        Set up problem.
        """
        super(Test_prob_emulated_1to1, self).setUp()

        (self.P_emulate, self.lambda_emulate, _,
         _) = calcP.prob_emulated(samples=self.samples,
                                  data=self.data,
                                  rho_D_M=self.d_distr_prob,
                                  d_distr_samples=self.d_distr_samples,
                                  lambda_emulate=self.lambda_emulate,
                                  d_Tree=self.d_Tree)
        self.P_emulate = util.get_global_values(self.P_emulate)
Exemple #13
0
 def setUp(self):
     """
     Set up problem.
     """
     super(Test_prob_emulated_3to1, self).setUp()
     (self.P_emulate, self.lambda_emulate, _,
      _) = calcP.prob_emulated(samples=self.samples,
                               data=self.data,
                               rho_D_M=self.d_distr_prob,
                               d_distr_samples=self.d_distr_samples,
                               lambda_emulate=self.lambda_emulate,
                               d_Tree=self.d_Tree)
     self.P_emulate_ref = np.loadtxt(data_path +
                                     "/3to1_prob_emulated.txt.gz")
     self.P_emulate = util.get_global_values(self.P_emulate)
Exemple #14
0
    def set_ptr_right(self, globalize=True):
        """
        Creates the pointer from ``self._comparison_sample_set`` to
        ``self._right_sample_set``

        .. seealso::

            :meth:`scipy.spatial.KDTree.query``

        :param bool globalize: flag whether or not to globalize
            ``self._ptr_right``

        """
        if self._comparison_sample_set._values_local is None:
            self._comparison_sample_set.global_to_local()

        (_, self._ptr_right_local) = self._right_sample_set.query(
            self._comparison_sample_set._values_local)

        if globalize:
            self._ptr_right = util.get_global_values(
                self._ptr_right_local)
        assert self._right_sample_set.check_num() >= max(self._ptr_right_local)
Exemple #15
0
    def generalized_chains(self,
                           param_min,
                           param_max,
                           t_set,
                           kern,
                           savefile,
                           initial_sample_type="lhs",
                           criterion='center'):
        """
        Basic adaptive sampling algorithm using generalized chains.
       
        :param string initial_sample_type: type of initial sample random (or r),
            latin hypercube(lhs), or space-filling curve(TBD)
        :param param_min: minimum value for each parameter dimension
        :type param_min: :class:`numpy.ndarray` (ndim,)
        :param param_max: maximum value for each parameter dimension
        :type param_max: :class:`numpy.ndarray` (ndim,)
        :param t_set: method for creating new parameter steps using
            given a step size based on the paramter domain size
        :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set`
        :param kern: functional that acts on the data used to
            determine the proposed change to the ``step_size``
        :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object.
        :param string savefile: filename to save samples and data
        :param string criterion: latin hypercube criterion see 
            `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_
        :rtype: tuple
        :returns: (``parameter_samples``, ``data_samples``, ``all_step_ratios``) where
            ``parameter_samples`` is np.ndarray of shape (num_samples, ndim),
            ``data_samples`` is np.ndarray of shape (num_samples, mdim), and 
            ``all_step_ratios`` is np.ndarray of shape (num_chains,
            chain_length)
        """
        if comm.size > 1:
            psavefile = os.path.join(
                os.path.dirname(savefile),
                "proc{}{}".format(comm.rank, os.path.basename(savefile)))

        # Initialize Nx1 vector Step_size = something reasonable (based on size
        # of domain and transition set type)
        # Calculate domain size
        param_left = np.repeat([param_min], self.num_chains_pproc, 0)
        param_right = np.repeat([param_max], self.num_chains_pproc, 0)

        param_width = param_right - param_left
        # Calculate step_size
        max_ratio = t_set.max_ratio
        min_ratio = t_set.min_ratio
        step_ratio = t_set.init_ratio * np.ones(self.num_chains_pproc)

        # Initiative first batch of N samples (maybe taken from latin
        # hypercube/space-filling curve to fully explore parameter space - not
        # necessarily random). Call these Samples_old.
        (samples_old,
         data_old) = super(sampler,
                           self).random_samples(initial_sample_type, param_min,
                                                param_max, savefile,
                                                self.num_chains, criterion)
        self.num_samples = self.chain_length * self.num_chains
        comm.Barrier()

        # now split it all up
        if comm.size > 1:
            MYsamples_old = np.empty((np.shape(samples_old)[0] / comm.size,
                                      np.shape(samples_old)[1]))
            comm.Scatter([samples_old, MPI.DOUBLE],
                         [MYsamples_old, MPI.DOUBLE])
            MYdata_old = np.empty(
                (np.shape(data_old)[0] / comm.size, np.shape(data_old)[1]))
            comm.Scatter([data_old, MPI.DOUBLE], [MYdata_old, MPI.DOUBLE])
        else:
            MYsamples_old = np.copy(samples_old)
            MYdata_old = np.copy(data_old)

        samples = MYsamples_old
        data = MYdata_old
        all_step_ratios = step_ratio
        (kern_old, proposal) = kern.delta_step(MYdata_old, None)
        mdat = dict()
        self.update_mdict(mdat)

        for batch in xrange(1, self.chain_length):
            # For each of N samples_old, create N new parameter samples using
            # transition set and step_ratio. Call these samples samples_new.
            samples_new = t_set.step(step_ratio, param_width, param_left,
                                     param_right, MYsamples_old)

            # Solve the model for the samples_new.
            data_new = self.lb_model(samples_new)

            # Make some decision about changing step_size(k).  There are
            # multiple ways to do this.
            # Determine step size
            (kern_old, proposal) = kern.delta_step(data_new, kern_old)
            step_ratio = proposal * step_ratio
            # Is the ratio greater than max?
            step_ratio[step_ratio > max_ratio] = max_ratio
            # Is the ratio less than min?
            step_ratio[step_ratio < min_ratio] = min_ratio

            # Save and export concatentated arrays
            if self.chain_length < 4:
                pass
            elif (batch + 1) % (self.chain_length / 4) == 0:
                print "Current chain length: " + str(batch + 1) + "/" + str(
                    self.chain_length)
            samples = np.concatenate((samples, samples_new))
            data = np.concatenate((data, data_new))
            all_step_ratios = np.concatenate((all_step_ratios, step_ratio))
            mdat['step_ratios'] = all_step_ratios
            mdat['samples'] = samples
            mdat['data'] = data
            if comm.size > 1:
                super(sampler, self).save(mdat, psavefile)
            else:
                super(sampler, self).save(mdat, savefile)
            MYsamples_old = samples_new

        # collect everything
        MYsamples = np.copy(samples)
        MYdata = np.copy(data)
        MYall_step_ratios = np.copy(all_step_ratios)
        # ``parameter_samples`` is np.ndarray of shape (num_samples, ndim)
        samples = util.get_global_values(MYsamples,
                                         shape=(self.num_samples,
                                                np.shape(MYsamples)[1]))
        # and ``data_samples`` is np.ndarray of shape (num_samples, mdim)
        data = util.get_global_values(MYdata,
                                      shape=(self.num_samples,
                                             np.shape(MYdata)[1]))
        # ``all_step_ratios`` is np.ndarray of shape (num_chains,
        # chain_length)
        all_step_ratios = util.get_global_values(MYall_step_ratios,
                                                 shape=(self.num_samples, ))
        all_step_ratios = np.reshape(all_step_ratios,
                                     (self.num_chains, self.chain_length))

        # save everything
        mdat['step_ratios'] = all_step_ratios
        mdat['samples'] = samples
        mdat['data'] = data
        super(sampler, self).save(mdat, savefile)

        return (samples, data, all_step_ratios)
Exemple #16
0
    def generalized_chains(self, param_min, param_max, t_set, kern,
            savefile, initial_sample_type="lhs", criterion='center'):
        """
        Basic adaptive sampling algorithm using generalized chains.
       
        :param string initial_sample_type: type of initial sample random (or r),
            latin hypercube(lhs), or space-filling curve(TBD)
        :param param_min: minimum value for each parameter dimension
        :type param_min: :class:`numpy.ndarray` (ndim,)
        :param param_max: maximum value for each parameter dimension
        :type param_max: :class:`numpy.ndarray` (ndim,)
        :param t_set: method for creating new parameter steps using
            given a step size based on the paramter domain size
        :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set`
        :param kern: functional that acts on the data used to
            determine the proposed change to the ``step_size``
        :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object.
        :param string savefile: filename to save samples and data
        :param string criterion: latin hypercube criterion see 
            `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_
        :rtype: tuple
        :returns: (``parameter_samples``, ``data_samples``, ``all_step_ratios``) where
            ``parameter_samples`` is np.ndarray of shape (num_samples, ndim),
            ``data_samples`` is np.ndarray of shape (num_samples, mdim), and 
            ``all_step_ratios`` is np.ndarray of shape (num_chains,
            chain_length)
        """
        if comm.size > 1:
            psavefile = os.path.join(os.path.dirname(savefile),
                    "proc{}{}".format(comm.rank, os.path.basename(savefile)))

        # Initialize Nx1 vector Step_size = something reasonable (based on size
        # of domain and transition set type)
        # Calculate domain size
        param_left = np.repeat([param_min], self.num_chains_pproc, 0)
        param_right = np.repeat([param_max], self.num_chains_pproc, 0)

        param_width = param_right - param_left
        # Calculate step_size
        max_ratio = t_set.max_ratio
        min_ratio = t_set.min_ratio
        step_ratio = t_set.init_ratio*np.ones(self.num_chains_pproc)
       
        # Initiative first batch of N samples (maybe taken from latin
        # hypercube/space-filling curve to fully explore parameter space - not
        # necessarily random). Call these Samples_old.
        (samples_old, data_old) = super(sampler, self).random_samples(
                initial_sample_type, param_min, param_max, savefile,
                self.num_chains, criterion)
        self.num_samples = self.chain_length * self.num_chains
        comm.Barrier()
        
        # now split it all up
        if comm.size > 1:
            MYsamples_old = np.empty((np.shape(samples_old)[0]/comm.size, np.shape(samples_old)[1]))
            comm.Scatter([samples_old, MPI.DOUBLE], [MYsamples_old, MPI.DOUBLE])
            MYdata_old = np.empty((np.shape(data_old)[0]/comm.size, np.shape(data_old)[1]))
            comm.Scatter([data_old, MPI.DOUBLE], [MYdata_old,
                                                  MPI.DOUBLE])
        else:
            MYsamples_old = np.copy(samples_old)
            MYdata_old = np.copy(data_old)

        samples = MYsamples_old
        data = MYdata_old
        all_step_ratios = step_ratio
        (kern_old, proposal) = kern.delta_step(MYdata_old, None)
        mdat = dict()
        self.update_mdict(mdat)

        for batch in xrange(1, self.chain_length):
            # For each of N samples_old, create N new parameter samples using
            # transition set and step_ratio. Call these samples samples_new.
            samples_new = t_set.step(step_ratio, param_width,
                    param_left, param_right, MYsamples_old)
            
            # Solve the model for the samples_new.
            data_new = self.lb_model(samples_new)
            
            # Make some decision about changing step_size(k).  There are
            # multiple ways to do this.
            # Determine step size
            (kern_old, proposal) = kern.delta_step(data_new, kern_old)
            step_ratio = proposal*step_ratio
            # Is the ratio greater than max?
            step_ratio[step_ratio > max_ratio] = max_ratio
            # Is the ratio less than min?
            step_ratio[step_ratio < min_ratio] = min_ratio

            # Save and export concatentated arrays
            if self.chain_length < 4:
                pass
            elif (batch+1)%(self.chain_length/4) == 0:
                print "Current chain length: "+str(batch+1)+"/"+str(self.chain_length)
            samples = np.concatenate((samples, samples_new))
            data = np.concatenate((data, data_new))
            all_step_ratios = np.concatenate((all_step_ratios, step_ratio))
            mdat['step_ratios'] = all_step_ratios
            mdat['samples'] = samples
            mdat['data'] = data
            if comm.size > 1:
                super(sampler, self).save(mdat, psavefile)
            else:
                super(sampler, self).save(mdat, savefile)
            MYsamples_old = samples_new

        # collect everything
        MYsamples = np.copy(samples)
        MYdata = np.copy(data)
        MYall_step_ratios = np.copy(all_step_ratios)
        # ``parameter_samples`` is np.ndarray of shape (num_samples, ndim)
        samples = util.get_global_values(MYsamples,
                shape=(self.num_samples, np.shape(MYsamples)[1]))           
        # and ``data_samples`` is np.ndarray of shape (num_samples, mdim)
        data = util.get_global_values(MYdata, shape=(self.num_samples,
            np.shape(MYdata)[1]))
        # ``all_step_ratios`` is np.ndarray of shape (num_chains,
        # chain_length)
        all_step_ratios = util.get_global_values(MYall_step_ratios,
                shape=(self.num_samples,))
        all_step_ratios = np.reshape(all_step_ratios, (self.num_chains, self.chain_length))

        # save everything
        mdat['step_ratios'] = all_step_ratios
        mdat['samples'] = samples
        mdat['data'] = data
        super(sampler, self).save(mdat, savefile)

        return (samples, data, all_step_ratios)
Exemple #17
0
    def generalized_chains(self, param_min, param_max, t_set, kern,
            savefile, initial_sample_type="random", criterion='center',
            hot_start=0): 
        """
        Basic adaptive sampling algorithm using generalized chains.
       
        :param string initial_sample_type: type of initial sample random (or r),
            latin hypercube(lhs), or space-filling curve(TBD)
        :param param_min: minimum value for each parameter dimension
        :type param_min: :class:`numpy.ndarray` (ndim,)
        :param param_max: maximum value for each parameter dimension
        :type param_max: :class:`numpy.ndarray` (ndim,)
        :param t_set: method for creating new parameter steps using
            given a step size based on the paramter domain size
        :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set`
        :param kern: functional that acts on the data used to
            determine the proposed change to the ``step_size``
        :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object.
        :param string savefile: filename to save samples and data
        :param int hot_start: Flag whether or not hot start the sampling
            chains from a previous set of chains. Note that ``num_chains`` must
            be the same, but ``num_chains_pproc`` need not be the same. 0 -
            cold start, 1 - hot start from uncompleted run, 2 - hot
            start from finished run
        :param string criterion: latin hypercube criterion see 
            `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_
        
        :rtype: tuple
        :returns: (``parameter_samples``, ``data_samples``,
            ``all_step_ratios``) where ``parameter_samples`` is np.ndarray of
            shape (num_samples, ndim), ``data_samples`` is np.ndarray of shape
            (num_samples, mdim), and ``all_step_ratios`` is np.ndarray of shape
            (num_chains, chain_length)
        
        """
        if comm.size > 1:
            psavefile = os.path.join(os.path.dirname(savefile),
                    "proc{}_{}".format(comm.rank, os.path.basename(savefile)))

        # Initialize Nx1 vector Step_size = something reasonable (based on size
        # of domain and transition set type)
        # Calculate domain size
        param_left = np.repeat([param_min], self.num_chains_pproc, 0)
        param_right = np.repeat([param_max], self.num_chains_pproc, 0)

        param_width = param_right - param_left
        # Calculate step_size
        max_ratio = t_set.max_ratio
        min_ratio = t_set.min_ratio

        if not hot_start:
            step_ratio = t_set.init_ratio*np.ones(self.num_chains_pproc)
           
            # Initiative first batch of N samples (maybe taken from latin
            # hypercube/space-filling curve to fully explore parameter space -
            # not necessarily random). Call these Samples_old.
            (samples_old, data_old) = super(sampler, self).random_samples(
                    initial_sample_type, param_min, param_max, savefile,
                    self.num_chains, criterion)
            self.num_samples = self.chain_length * self.num_chains
            comm.Barrier()
            
            # now split it all up
            if comm.size > 1:
                MYsamples_old = np.empty((np.shape(samples_old)[0]/comm.size,
                    np.shape(samples_old)[1])) 
                comm.Scatter([samples_old, MPI.DOUBLE], [MYsamples_old,
                    MPI.DOUBLE])
                MYdata_old = np.empty((np.shape(data_old)[0]/comm.size,
                    np.shape(data_old)[1])) 
                comm.Scatter([data_old, MPI.DOUBLE], [MYdata_old, MPI.DOUBLE])
            else:
                MYsamples_old = np.copy(samples_old)
                MYdata_old = np.copy(data_old)

            samples = MYsamples_old
            data = MYdata_old
            all_step_ratios = step_ratio
            (kern_old, proposal) = kern.delta_step(MYdata_old, None)
            start_ind = 1
        if hot_start:
            # LOAD FILES
            if hot_start == 1: # HOT START FROM PARTIAL RUN
                if comm.rank == 0:
                    print "HOT START from partial run"
                # Find and open save files
                save_dir = os.path.dirname(savefile)
                base_name = os.path.dirname(savefile)
                mdat_files = glob.glob(os.path.join(save_dir,
                        "proc*_{}".format(base_name)))
                if len(mdat_files) == 0:
                    print "HOT START using serial file"
                    mdat = sio.loadmat(savefile)
                    samples = mdat['samples']
                    data = mdat['data']
                    kern_old = np.squeeze(mdat['kern_old'])
                    all_step_ratios = np.squeeze(mdat['step_ratios'])
                    chain_length = samples.shape[0]/self.num_chains
                    if all_step_ratios.shape == (self.num_chains,
                            chain_length):
                        print "Serial file, from completed run updating hot_start"
                        hot_start = 2
                    # reshape if parallel
                    if comm.size > 1:
                        samples = np.reshape(samples, (self.num_chains,
                            chain_length, -1), 'F')
                        data = np.reshape(data, (self.num_chains,
                            chain_length, -1), 'F')
                        all_step_ratios = np.reshape(all_step_ratios,
                                (self.num_chains, -1), 'F')
                elif hot_start == 1 and len(mdat_files) == comm.size:
                    print "HOT START using parallel files (same nproc)"
                    # if the number of processors is the same then set mdat to
                    # be the one with the matching processor number (doesn't
                    # really matter)
                    mdat = sio.loadmat(mdat_files[comm.rank])
                    samples = mdat['samples']
                    data = mdat['data']
                    kern_old = np.squeeze(mdat['kern_old'])
                    all_step_ratios = np.squeeze(mdat['step_ratios'])
                elif hot_start == 1 and len(mdat_files) != comm.size:
                    print "HOT START using parallel files (diff nproc)"
                    # Determine how many processors the previous data used
                    # otherwise gather the data from mdat and then scatter
                    # among the processors and update mdat
                    mdat_files_local = comm.scatter(mdat_files)
                    mdat_local = [sio.loadmat(m) for m in mdat_files_local]
                    mdat_list = comm.allgather(mdat_local)
                    mdat_global = []
                    # instead of a list of lists, create a list of mdat
                    for mlist in mdat_list:
                        mdat_global.extend(mlist)
                    # get num_proc and num_chains_pproc for previous run
                    old_num_proc = max((len(mdat_list), 1))
                    old_num_chains_pproc = self.num_chains/old_num_proc
                    # get batch size and/or number of dimensions
                    chain_length = mdat_global[0]['samples'].shape[0]/\
                            old_num_chains_pproc
                    # create lists of local data
                    samples = []
                    data = []
                    all_step_ratios = []
                    kern_old = []
                    # RESHAPE old_num_chains_pproc, chain_length(or batch), dim
                    for mdat in mdat_global:
                        samples.append(np.reshape(mdat['samples'],
                            (old_num_chains_pproc, chain_length, -1), 'F'))
                        data.append(np.reshape(mdat['data'],
                            (old_num_chains_pproc, chain_length, -1), 'F'))
                        all_step_ratios.append(np.reshape(mdat['step_ratios'],
                            (old_num_chains_pproc, chain_length, -1), 'F'))
                        kern_old.append(np.reshape(mdat['kern_old'],
                            (old_num_chains_pproc,), 'F'))
                    # turn into arrays
                    samples = np.concatenate(samples)
                    data = np.concatenate(data)
                    all_step_ratios = np.concatenate(all_step_ratios)
                    kern_old = np.concatenate(kern_old)
            if hot_start == 2: # HOT START FROM COMPLETED RUN:
                if comm.rank == 0:
                    print "HOT START from completed run"
                mdat = sio.loadmat(savefile)
                samples = mdat['samples']
                data = mdat['data']
                kern_old = np.squeeze(mdat['kern_old'])
                all_step_ratios = np.squeeze(mdat['step_ratios'])
                chain_length = samples.shape[0]/self.num_chains
                mdat_files = []
                # reshape if parallel
                if comm.size > 1:
                    samples = np.reshape(samples, (self.num_chains,
                        chain_length, -1), 'F')
                    data = np.reshape(data, (self.num_chains,
                        chain_length, -1), 'F')
                    all_step_ratios = np.reshape(all_step_ratios,
                            (self.num_chains, chain_length), 'F')
            # SPLIT DATA IF NECESSARY
            if comm.size > 1 and (hot_start == 2 or (hot_start == 1 and \
                    len(mdat_files) != comm.size)):
                # Use split to split along num_chains
                samples = np.reshape(np.split(samples, comm.size,
                    0)[comm.rank], (self.num_chains_pproc*chain_length, -1),
                    'F')
                data = np.reshape(np.split(data, comm.size, 0)[comm.rank],
                        (self.num_chains_pproc*chain_length, -1), 'F')
                all_step_ratios = np.reshape(np.split(all_step_ratios,
                    comm.size, 0)[comm.rank],
                    (self.num_chains_pproc*chain_length,), 'F')
                kern_old = np.reshape(np.split(kern_old, comm.size,
                    0)[comm.rank], (self.num_chains_pproc,), 'F')
            else:
                all_step_ratios = np.reshape(all_step_ratios, (-1,), 'F')
            # Set samples, data, all_step_ratios, mdat, step_ratio,
            # MYsamples_old, and kern_old accordingly
            step_ratio = all_step_ratios[-self.num_chains_pproc:]
            MYsamples_old = samples[-self.num_chains_pproc:, :]
            # Determine how many batches have been run
            start_ind = samples.shape[0]/self.num_chains_pproc
        
        mdat = dict()
        self.update_mdict(mdat)
        for batch in xrange(start_ind, self.chain_length):
            # For each of N samples_old, create N new parameter samples using
            # transition set and step_ratio. Call these samples samples_new.
            samples_new = t_set.step(step_ratio, param_width,
                    param_left, param_right, MYsamples_old)
        
            # Solve the model for the samples_new.
            data_new = self.lb_model(samples_new)
            
            # Make some decision about changing step_size(k).  There are
            # multiple ways to do this.
            # Determine step size
            (kern_old, proposal) = kern.delta_step(data_new, kern_old)
            step_ratio = proposal*step_ratio
            # Is the ratio greater than max?
            step_ratio[step_ratio > max_ratio] = max_ratio
            # Is the ratio less than min?
            step_ratio[step_ratio < min_ratio] = min_ratio

            # Save and export concatentated arrays
            if self.chain_length < 4:
                pass
            elif comm.rank == 0 and (batch+1)%(self.chain_length/4) == 0:
                print "Current chain length: "+\
                            str(batch+1)+"/"+str(self.chain_length)
            samples = np.concatenate((samples, samples_new))
            data = np.concatenate((data, data_new))
            all_step_ratios = np.concatenate((all_step_ratios, step_ratio))
            mdat['step_ratios'] = all_step_ratios
            mdat['samples'] = samples
            mdat['data'] = data
            mdat['kern_old'] = kern_old
            if comm.size > 1:
                super(sampler, self).save(mdat, psavefile)
            else:
                super(sampler, self).save(mdat, savefile)
            MYsamples_old = samples_new

        # collect everything
        MYsamples = np.copy(samples)
        MYdata = np.copy(data)
        MYall_step_ratios = np.copy(all_step_ratios)
        # ``parameter_samples`` is np.ndarray of shape (num_samples, ndim)
        samples = util.get_global_values(MYsamples,
                shape=(self.num_samples, np.shape(MYsamples)[1]))           
        # and ``data_samples`` is np.ndarray of shape (num_samples, mdim)
        data = util.get_global_values(MYdata, shape=(self.num_samples,
            np.shape(MYdata)[1]))
        # ``all_step_ratios`` is np.ndarray of shape (num_chains,
        # chain_length)
        all_step_ratios = util.get_global_values(MYall_step_ratios,
                shape=(self.num_samples,))
        all_step_ratios = np.reshape(all_step_ratios, (self.num_chains,
            self.chain_length), 'F')

        # save everything
        mdat['step_ratios'] = all_step_ratios
        mdat['samples'] = samples
        mdat['data'] = data
        mdat['kern_old'] = util.get_global_values(kern_old,
                shape=(self.num_chains,))
        super(sampler, self).save(mdat, savefile)

        return (samples, data, all_step_ratios)
Exemple #18
0
def prob_mc(samples, data, rho_D_M, d_distr_samples,
            lambda_emulate=None, d_Tree=None): 
    r"""
    Calculates :math:`P_{\Lambda}(\mathcal{V}_{\lambda_{samples}})`, the
    probability assoicated with a set of voronoi cells defined by the model
    solves at :math:`(\lambda_{samples})` where the volumes of these voronoi
    cells are approximated using MC integration.

    :param samples: The samples in parameter space for which the model was run.
    :type samples: :class:`~numpy.ndarray` of shape (num_samples, ndim)
    :param data: The data from running the model given the samples.
    :type data: :class:`~numpy.ndarray` of size (num_samples, mdim)
    :param rho_D_M: The simple function approximation of rho_D
    :type rho_D_M: :class:`~numpy.ndarray` of shape  (M,) 
    :param d_distr_samples: The samples in the data space that define a
        parition of D to for the simple function approximation
    :type d_distr_samples: :class:`~numpy.ndarray` of shape  (M, mdim) 
    :param d_Tree: :class:`~scipy.spatial.KDTree` for d_distr_samples
    :param lambda_emulate: Samples used to partition the parameter space

    :rtype: tuple of :class:`~numpy.ndarray` of sizes (num_samples,),
        (num_samples,), (ndim, num_l_emulate), (num_samples,), (num_l_emulate,)
    :returns: (P, lam_vol, lambda_emulate, io_ptr, emulate_ptr) where P is the
        probability associated with samples, lam_vol the volumes associated
        with the samples, io_ptr a pointer from data to M bins, and emulate_ptr
        a pointer from emulated samples to samples (in parameter space)

    """
    if len(samples.shape) == 1:
        samples = np.expand_dims(samples, axis=1) 
    if len(data.shape) == 1:
        data = np.expand_dims(data, axis=1) 
    if type(lambda_emulate) == type(None):
        lambda_emulate = samples
    if len(d_distr_samples.shape) == 1:
        d_distr_samples = np.expand_dims(d_distr_samples, axis=1)
    if type(d_Tree) == type(None):
        d_Tree = spatial.KDTree(d_distr_samples)
        
    # Determine which inputs go to which M bins using the QoI
    (_, io_ptr) = d_Tree.query(data)
    
    # Determine which emulated samples match with which model run samples
    l_Tree = spatial.KDTree(samples)
    (_, emulate_ptr) = l_Tree.query(lambda_emulate)

    # Apply the standard MC approximation to determine the number of emulated
    # samples per model run sample. This is for approximating 
    # \mu_Lambda(A_i \intersect b_j)
    lam_vol = np.zeros((samples.shape[0],)) 
    for i in range(samples.shape[0]):
        lam_vol[i] = np.sum(np.equal(emulate_ptr, i))
    clam_vol = np.copy(lam_vol) 
    comm.Allreduce([lam_vol, MPI.DOUBLE], [clam_vol, MPI.DOUBLE], op=MPI.SUM)
    lam_vol = clam_vol
    num_emulated = lambda_emulate.shape[0]
    num_emulated = comm.allreduce(num_emulated, op=MPI.SUM)
    lam_vol = lam_vol/(num_emulated)

    # Set up local arrays for parallelism
    local_index = range(0+comm.rank, samples.shape[0], comm.size)
    samples_local = samples[local_index, :]
    data_local = data[local_index, :]
    lam_vol_local = lam_vol[local_index]
    local_array = np.array(local_index, dtype='int64')
        
    # Determine which inputs go to which M bins using the QoI
    (_, io_ptr_local) = d_Tree.query(data_local)

    # Calculate Probabilities
    P_local = np.zeros((samples_local.shape[0],))
    for i in range(rho_D_M.shape[0]):
        Itemp = np.equal(io_ptr_local, i)
        Itemp_sum = np.sum(lam_vol_local[Itemp])
        Itemp_sum = comm.allreduce(Itemp_sum, op=MPI.SUM)
        if Itemp_sum > 0:
            P_local[Itemp] = rho_D_M[i]*lam_vol_local[Itemp]/Itemp_sum 
    P_global = util.get_global_values(P_local)
    global_index = util.get_global_values(local_array)
    P = np.zeros(P_global.shape)
    P[global_index] = P_global[:]
    return (P, lam_vol, lambda_emulate, io_ptr, emulate_ptr)
Exemple #19
0
    def generalized_chains(self,
                           input_obj,
                           t_set,
                           kern,
                           savefile,
                           initial_sample_type="random",
                           criterion='center',
                           hot_start=0):
        """
        Basic adaptive sampling algorithm using generalized chains.

        .. todo::

            Test HOTSTART from parallel files using different num proc

        :param string initial_sample_type: type of initial sample random (or r),
            latin hypercube(lhs), or space-filling curve(TBD)
        :param input_obj: Either a :class:`bet.sample.sample_set` object for an
            input space, an array of min and max bounds for the input values
            with ``min = input_domain[:, 0]`` and ``max = input_domain[:, 1]``,
            or the dimension of an input space
        :type input_obj: :class:`~bet.sample.sample_set`,
            :class:`numpy.ndarray` of shape (ndim, 2), or :class: `int`
        :param t_set: method for creating new parameter steps using
            given a step size based on the paramter domain size
        :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set`
        :param kern: functional that acts on the data used to
            determine the proposed change to the ``step_size``
        :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object.
        :param string savefile: filename to save samples and data
        :param int hot_start: Flag whether or not hot start the sampling
            chains from a previous set of chains. Note that ``num_chains`` must
            be the same, but ``num_chains_pproc`` need not be the same. 0 -
            cold start, 1 - hot start from uncompleted run, 2 - hot
            start from finished run
        :param string criterion: latin hypercube criterion see 
            `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_
        
        :rtype: tuple
        :returns: (``discretization``, ``all_step_ratios``) where
            ``discretization`` is a :class:`~bet.sample.discretization` object
            containing ``num_samples``  and  ``all_step_ratios`` is np.ndarray
            of shape ``(num_chains, chain_length)``
        
        """

        # Calculate step_size
        max_ratio = t_set.max_ratio
        min_ratio = t_set.min_ratio

        if not hot_start:
            logging.info("COLD START")
            step_ratio = t_set.init_ratio * np.ones(self.num_chains_pproc)

            # Initiative first batch of N samples (maybe taken from latin
            # hypercube/space-filling curve to fully explore parameter space -
            # not necessarily random). Call these Samples_old.
            disc_old = super(sampler, self).create_random_discretization(
                initial_sample_type,
                input_obj,
                savefile,
                self.num_chains,
                criterion,
                globalize=False)
            self.num_samples = self.chain_length * self.num_chains
            comm.Barrier()

            # populate local values
            #disc_old._input_sample_set.global_to_local()
            #disc_old._output_sample_set.global_to_local()
            input_old = disc_old._input_sample_set.copy()

            disc = disc_old.copy()
            all_step_ratios = step_ratio

            (kern_old, proposal) = kern.delta_step(disc_old.\
                    _output_sample_set.get_values_local(), None)

            start_ind = 1

        if hot_start:
            # LOAD FILES
            _, disc, all_step_ratios, kern_old = loadmat(
                savefile,
                lb_model=None,
                hot_start=hot_start,
                num_chains=self.num_chains)
            # MAKE SURE ARRAYS ARE LOCALIZED FROM HERE ON OUT WILL ONLY
            # OPERATE ON _local_values
            # Set mdat, step_ratio, input_old, start_ind appropriately
            step_ratio = all_step_ratios[-self.num_chains_pproc:]
            input_old = sample.sample_set(disc._input_sample_set.get_dim())
            input_old.set_domain(disc._input_sample_set.get_domain())
            input_old.set_values_local(disc._input_sample_set.\
                    get_values_local()[-self.num_chains_pproc:, :])

            # Determine how many batches have been run
            start_ind = disc._input_sample_set.get_values_local().\
                    shape[0]/self.num_chains_pproc

        mdat = dict()
        self.update_mdict(mdat)
        input_old.update_bounds_local()

        for batch in xrange(start_ind, self.chain_length):
            # For each of N samples_old, create N new parameter samples using
            # transition set and step_ratio. Call these samples input_new.
            input_new = t_set.step(step_ratio, input_old)

            # Solve the model for the input_new.
            output_new_values = self.lb_model(input_new.get_values_local())

            # Make some decision about changing step_size(k).  There are
            # multiple ways to do this.
            # Determine step size
            (kern_old, proposal) = kern.delta_step(output_new_values, kern_old)
            step_ratio = proposal * step_ratio
            # Is the ratio greater than max?
            step_ratio[step_ratio > max_ratio] = max_ratio
            # Is the ratio less than min?
            step_ratio[step_ratio < min_ratio] = min_ratio

            # Save and export concatentated arrays
            if self.chain_length < 4:
                pass
            elif comm.rank == 0 and (batch + 1) % (self.chain_length / 4) == 0:
                logging.info("Current chain length: "+\
                            str(batch+1)+"/"+str(self.chain_length))
            disc._input_sample_set.append_values_local(input_new.\
                    get_values_local())
            disc._output_sample_set.append_values_local(output_new_values)
            all_step_ratios = np.concatenate((all_step_ratios, step_ratio))
            mdat['step_ratios'] = all_step_ratios
            mdat['kern_old'] = kern_old

            super(sampler, self).save(mdat, savefile, disc, globalize=False)
            input_old = input_new

        # collect everything
        disc._input_sample_set.update_bounds_local()
        #disc._input_sample_set.local_to_global()
        #disc._output_sample_set.local_to_global()

        MYall_step_ratios = np.copy(all_step_ratios)
        # ``all_step_ratios`` is np.ndarray of shape (num_chains,
        # chain_length)
        all_step_ratios = util.get_global_values(MYall_step_ratios,
                                                 shape=(self.num_samples, ))
        all_step_ratios = np.reshape(all_step_ratios,
                                     (self.num_chains, self.chain_length), 'F')

        # save everything
        mdat['step_ratios'] = all_step_ratios
        mdat['kern_old'] = util.get_global_values(kern_old,
                                                  shape=(self.num_chains, ))
        super(sampler, self).save(mdat, savefile, disc, globalize=True)

        return (disc, all_step_ratios)
Exemple #20
0
    def generalized_chains(self, input_obj, t_set, kern,
            savefile, initial_sample_type="random", criterion='center',
            hot_start=0): 
        """
        Basic adaptive sampling algorithm using generalized chains.

        .. todo::

            Test HOTSTART from parallel files using different num proc

        :param string initial_sample_type: type of initial sample random (or r),
            latin hypercube(lhs), or space-filling curve(TBD)
        :param input_obj: Either a :class:`bet.sample.sample_set` object for an
            input space, an array of min and max bounds for the input values
            with ``min = input_domain[:, 0]`` and ``max = input_domain[:, 1]``,
            or the dimension of an input space
        :type input_obj: :class:`~bet.sample.sample_set`,
            :class:`numpy.ndarray` of shape (ndim, 2), or :class: `int`
        :param t_set: method for creating new parameter steps using
            given a step size based on the paramter domain size
        :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set`
        :param kern: functional that acts on the data used to
            determine the proposed change to the ``step_size``
        :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object.
        :param string savefile: filename to save samples and data
        :param int hot_start: Flag whether or not hot start the sampling
            chains from a previous set of chains. Note that ``num_chains`` must
            be the same, but ``num_chains_pproc`` need not be the same. 0 -
            cold start, 1 - hot start from uncompleted run, 2 - hot
            start from finished run
        :param string criterion: latin hypercube criterion see 
            `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_
        
        :rtype: tuple
        :returns: (``discretization``, ``all_step_ratios``) where
            ``discretization`` is a :class:`~bet.sample.discretization` object
            containing ``num_samples``  and  ``all_step_ratios`` is np.ndarray
            of shape ``(num_chains, chain_length)``
        
        """

        # Calculate step_size
        max_ratio = t_set.max_ratio
        min_ratio = t_set.min_ratio

        if not hot_start:
            logging.info("COLD START")
            step_ratio = t_set.init_ratio*np.ones(self.num_chains_pproc)
           
            # Initiative first batch of N samples (maybe taken from latin
            # hypercube/space-filling curve to fully explore parameter space -
            # not necessarily random). Call these Samples_old.
            disc_old = super(sampler, self).create_random_discretization(
                    initial_sample_type, input_obj, savefile,
                    self.num_chains, criterion, globalize=False)
            self.num_samples = self.chain_length * self.num_chains
            comm.Barrier()
            
            # populate local values 
            #disc_old._input_sample_set.global_to_local()
            #disc_old._output_sample_set.global_to_local()
            input_old = disc_old._input_sample_set.copy()
            
            disc = disc_old.copy()
            all_step_ratios = step_ratio 

            (kern_old, proposal) = kern.delta_step(disc_old.\
                    _output_sample_set.get_values_local(), None)

            start_ind = 1

        if hot_start:
            # LOAD FILES
            _, disc, all_step_ratios, kern_old = loadmat(savefile,
                    lb_model=None, hot_start=hot_start,
                    num_chains=self.num_chains)
            # MAKE SURE ARRAYS ARE LOCALIZED FROM HERE ON OUT WILL ONLY
            # OPERATE ON _local_values
            # Set mdat, step_ratio, input_old, start_ind appropriately
            step_ratio = all_step_ratios[-self.num_chains_pproc:]
            input_old = sample.sample_set(disc._input_sample_set.get_dim())
            input_old.set_domain(disc._input_sample_set.get_domain())
            input_old.set_values_local(disc._input_sample_set.\
                    get_values_local()[-self.num_chains_pproc:, :])

            # Determine how many batches have been run
            start_ind = disc._input_sample_set.get_values_local().\
                    shape[0]/self.num_chains_pproc
        
        mdat = dict()
        self.update_mdict(mdat)
        input_old.update_bounds_local()

        for batch in xrange(start_ind, self.chain_length):
            # For each of N samples_old, create N new parameter samples using
            # transition set and step_ratio. Call these samples input_new.
            input_new = t_set.step(step_ratio, input_old)
        
            # Solve the model for the input_new.
            output_new_values = self.lb_model(input_new.get_values_local())
            
            # Make some decision about changing step_size(k).  There are
            # multiple ways to do this.
            # Determine step size
            (kern_old, proposal) = kern.delta_step(output_new_values, kern_old)
            step_ratio = proposal*step_ratio
            # Is the ratio greater than max?
            step_ratio[step_ratio > max_ratio] = max_ratio
            # Is the ratio less than min?
            step_ratio[step_ratio < min_ratio] = min_ratio

            # Save and export concatentated arrays
            if self.chain_length < 4:
                pass
            elif comm.rank == 0 and (batch+1)%(self.chain_length/4) == 0:
                logging.info("Current chain length: "+\
                            str(batch+1)+"/"+str(self.chain_length))
            disc._input_sample_set.append_values_local(input_new.\
                    get_values_local())
            disc._output_sample_set.append_values_local(output_new_values)
            all_step_ratios = np.concatenate((all_step_ratios, step_ratio))
            mdat['step_ratios'] = all_step_ratios
            mdat['kern_old'] = kern_old
            
            super(sampler, self).save(mdat, savefile, disc, globalize=False)
            input_old = input_new

        # collect everything
        disc._input_sample_set.update_bounds_local() 
        #disc._input_sample_set.local_to_global()
        #disc._output_sample_set.local_to_global()

        MYall_step_ratios = np.copy(all_step_ratios) 
        # ``all_step_ratios`` is np.ndarray of shape (num_chains,
        # chain_length)
        all_step_ratios = util.get_global_values(MYall_step_ratios,
                shape=(self.num_samples,))
        all_step_ratios = np.reshape(all_step_ratios, (self.num_chains,
            self.chain_length), 'F')

        # save everything
        mdat['step_ratios'] = all_step_ratios
        mdat['kern_old'] = util.get_global_values(kern_old,
                shape=(self.num_chains,))
        super(sampler, self).save(mdat, savefile, disc, globalize=True)

        return (disc, all_step_ratios)