Beispiel #1
0
def get_global_values(array, shape=None):
    """
    Concatenates local arrays into global array using :meth:`np.vstack`.

    :param array: Array.
    :type P_samples: :class:`~numpy.ndarray`
    :rtype: :class:`~numpy.ndarray`
    :returns: array
    """
    if comm.size == 1:
        return array
    else:
        # Figure out the subtype of the elements of the array
        dtype = array.dtype
        mpi_dtype = False
        for ptype in possible_types.iterkeys():
            if np.issubdtype(dtype, ptype):
                mpi_dtype = True
                dtype = ptype

        if shape is None or not mpi_dtype:
            # do a lowercase allgather
            a_shape = len(array.shape)
            array = comm.allgather(array)
            if a_shape == 1:
                return np.hstack(array)
            else:
                return np.vstack(array)
        else:
            # do an uppercase Allgather
            whole_a = np.empty(shape, dtype=dtype)
            comm.Allgather([array.ravel(), possible_types[dtype]], [whole_a,
                possible_types[dtype]])
            return whole_a
Beispiel #2
0
def get_global_values(array, shape=None):
    """
    Concatenates local arrays into global array using :meth:`np.vstack`.

    :param array: Array.
    :type P_samples: :class:`~numpy.ndarray`
    :rtype: :class:`~numpy.ndarray`
    :returns: array
    """
    if comm.size == 1:
        return array
    else:
        # Figure out the subtype of the elements of the array
        dtype = array.dtype
        mpi_dtype = False
        for ptype in possible_types.iterkeys():
            if np.issubdtype(dtype, ptype):
                mpi_dtype = True
                dtype = ptype

        if shape is None or not mpi_dtype:
            # do a lowercase allgather
            a_shape = len(array.shape)
            array = comm.allgather(array)
            if a_shape <= 1:
                return np.hstack(array)
            else:
                return np.vstack(array)
        else:
            # do an uppercase Allgather
            whole_a = np.empty(shape, dtype=dtype)
            comm.Allgather([array.ravel(), possible_types[dtype]],
                           [whole_a, possible_types[dtype]])
            return whole_a
Beispiel #3
0
def loadmat(save_file, lb_model=None, hot_start=None, num_chains=None):
    """
    Loads data from ``save_file`` into a
    :class:`~bet.sampling.adaptiveSampling.sampler` object.
    
    :param string save_file: file name
    :param lb_model: runs the model at a given set of parameter samples, (N,
        ndim), and returns data (N, mdim)
    :param int hot_start: Flag whether or not hot start the sampling
            chains from a previous set of chains. Note that ``num_chains`` must
            be the same, but ``num_chains_pproc`` need not be the same. 0 -
            cold start, 1 - hot start from uncompleted run, 2 - hot
            start from finished run
    :param int num_chains: total number of chains of samples
    :param callable lb_model: runs the model at a given set of parameter
        samples, (N, ndim), and returns data (N, mdim)
    
    :rtype: tuple of (:class:`bet.sampling.adaptiveSampling.sampler`,
        :class:`bet.sample.discretization`, :class:`numpy.ndarray`,
        :class:`numpy.ndarray`)
    :returns: (``sampler``, ``discretization``, ``all_step_ratios``,
        ``kern_old``)
    
    """
    print hot_start
    if hot_start is None:
        hot_start = 1

# LOAD FILES
    if hot_start == 1:  # HOT START FROM PARTIAL RUN
        if comm.rank == 0:
            logging.info("HOT START from partial run")
        # Find and open save files
        save_dir = os.path.dirname(save_file)
        base_name = os.path.basename(save_file)
        mdat_files = glob.glob(
            os.path.join(save_dir, "proc*_{}".format(base_name)))
        if len(mdat_files) > 0:
            tmp_mdat = sio.loadmat(mdat_files[0])
        else:
            tmp_mdat = sio.loadmat(save_file)
        if num_chains is None:
            num_chains = np.squeeze(tmp_mdat['num_chains'])
        num_chains_pproc = num_chains / comm.size
        if len(mdat_files) == 0:
            logging.info("HOT START using serial file")
            mdat = sio.loadmat(save_file)
            if num_chains is None:
                num_chains = np.squeeze(mdat['num_chains'])
            num_chains_pproc = num_chains / comm.size
            disc = sample.load_discretization(save_file)
            kern_old = np.squeeze(mdat['kern_old'])
            all_step_ratios = np.squeeze(mdat['step_ratios'])
            chain_length = disc.check_nums() / num_chains
            if all_step_ratios.shape == (num_chains, chain_length):
                msg = "Serial file, from completed"
                msg += " run updating hot_start"
                hot_start = 2
            # reshape if parallel
            if comm.size > 1:
                temp_input = np.reshape(disc._input_sample_set.\
                        get_values(), (num_chains,
                            chain_length, -1), 'F')
                temp_output = np.reshape(disc._output_sample_set.\
                        get_values(), (num_chains,
                            chain_length, -1), 'F')
                all_step_ratios = np.reshape(all_step_ratios, (num_chains, -1),
                                             'F')
        elif hot_start == 1 and len(mdat_files) == comm.size:
            logging.info("HOT START using parallel files (same nproc)")
            # if the number of processors is the same then set mdat to
            # be the one with the matching processor number (doesn't
            # really matter)
            disc = sample.load_discretization(mdat_files[comm.rank])
            kern_old = np.squeeze(tmp_mdat['kern_old'])
            all_step_ratios = np.squeeze(tmp_mdat['step_ratios'])
        elif hot_start == 1 and len(mdat_files) != comm.size:
            logging.info("HOT START using parallel files (diff nproc)")
            # Determine how many processors the previous data used
            # otherwise gather the data from mdat and then scatter
            # among the processors and update mdat
            mdat_files_local = comm.scatter(mdat_files)
            mdat_local = [sio.loadmat(m) for m in mdat_files_local]
            disc_local = [sample.load_discretization(m) for m in\
                    mdat_files_local]
            mdat_list = comm.allgather(mdat_local)
            disc_list = comm.allgather(disc_local)
            mdat_global = []
            disc_global = []
            # instead of a list of lists, create a list of mdat
            for mlist, dlist in zip(mdat_list, disc_list):
                mdat_global.extend(mlist)
                disc_global.extend(dlist)
            # get num_proc and num_chains_pproc for previous run
            old_num_proc = max((len(mdat_list), 1))
            old_num_chains_pproc = num_chains / old_num_proc
            # get batch size and/or number of dimensions
            chain_length = disc_global[0].check_nums()/\
                    old_num_chains_pproc
            disc = disc_global[0].copy()
            # create lists of local data
            temp_input = []
            temp_output = []
            all_step_ratios = []
            kern_old = []
            # RESHAPE old_num_chains_pproc, chain_length(or batch), dim
            for mdat, disc_local in zip(mdat_global, disc_local):
                temp_input.append(np.reshape(disc_local.\
                        _input_sample_set.get_values_local(),
                        (old_num_chains_pproc, chain_length, -1), 'F'))
                temp_output.append(np.reshape(disc_local.\
                        _output_sample_set.get_values_local(),
                        (old_num_chains_pproc, chain_length, -1), 'F'))
                all_step_ratios.append(
                    np.reshape(mdat['step_ratios'],
                               (old_num_chains_pproc, chain_length, -1), 'F'))
                kern_old.append(
                    np.reshape(mdat['kern_old'], (old_num_chains_pproc, ),
                               'F'))
            # turn into arrays
            temp_input = np.concatenate(temp_input)
            temp_output = np.concatenate(temp_output)
            all_step_ratios = np.concatenate(all_step_ratios)
            kern_old = np.concatenate(kern_old)
    if hot_start == 2:  # HOT START FROM COMPLETED RUN:
        if comm.rank == 0:
            logging.info("HOT START from completed run")
        mdat = sio.loadmat(save_file)
        if num_chains is None:
            num_chains = np.squeeze(mdat['num_chains'])
        num_chains_pproc = num_chains / comm.size
        disc = sample.load_discretization(save_file)
        kern_old = np.squeeze(mdat['kern_old'])
        all_step_ratios = np.squeeze(mdat['step_ratios'])
        chain_length = disc.check_nums() / num_chains
        # reshape if parallel
        if comm.size > 1:
            temp_input = np.reshape(disc._input_sample_set.\
                        get_values(), (num_chains, chain_length,
                            -1), 'F')
            temp_output = np.reshape(disc._output_sample_set.\
                        get_values(), (num_chains, chain_length,
                            -1), 'F')
            all_step_ratios = np.reshape(all_step_ratios,
                                         (num_chains, chain_length), 'F')
    # SPLIT DATA IF NECESSARY
    if comm.size > 1 and (hot_start == 2 or (hot_start == 1 and \
            len(mdat_files) != comm.size)):
        # Use split to split along num_chains and set *._values_local
        disc._input_sample_set.set_values_local(np.reshape(np.split(\
                temp_input, comm.size, 0)[comm.rank],
                (num_chains_pproc*chain_length, -1), 'F'))
        disc._output_sample_set.set_values_local(np.reshape(np.split(\
                temp_output, comm.size, 0)[comm.rank],
                (num_chains_pproc*chain_length, -1), 'F'))
        all_step_ratios = np.reshape(
            np.split(all_step_ratios, comm.size, 0)[comm.rank],
            (num_chains_pproc * chain_length, ), 'F')
        kern_old = np.reshape(
            np.split(kern_old, comm.size, 0)[comm.rank], (num_chains_pproc, ),
            'F')
    else:
        all_step_ratios = np.reshape(all_step_ratios, (-1, ), 'F')
    print chain_length * num_chains, chain_length, lb_model
    new_sampler = sampler(chain_length * num_chains, chain_length, lb_model)
    return (new_sampler, disc, all_step_ratios, kern_old)
Beispiel #4
0
def loadmat(save_file, lb_model=None, hot_start=None, num_chains=None):
    """
    Loads data from ``save_file`` into a
    :class:`~bet.sampling.adaptiveSampling.sampler` object.
    
    :param string save_file: file name
    :param lb_model: runs the model at a given set of parameter samples, (N,
        ndim), and returns data (N, mdim)
    :param int hot_start: Flag whether or not hot start the sampling
            chains from a previous set of chains. Note that ``num_chains`` must
            be the same, but ``num_chains_pproc`` need not be the same. 0 -
            cold start, 1 - hot start from uncompleted run, 2 - hot
            start from finished run
    :param int num_chains: total number of chains of samples
    :param callable lb_model: runs the model at a given set of parameter
        samples, (N, ndim), and returns data (N, mdim)
    
    :rtype: tuple of (:class:`bet.sampling.adaptiveSampling.sampler`,
        :class:`bet.sample.discretization`, :class:`numpy.ndarray`,
        :class:`numpy.ndarray`)
    :returns: (``sampler``, ``discretization``, ``all_step_ratios``,
        ``kern_old``)
    
    """
    print hot_start
    if hot_start is None:
        hot_start = 1
   # LOAD FILES
    if hot_start == 1: # HOT START FROM PARTIAL RUN
        if comm.rank == 0:
            logging.info("HOT START from partial run")
        # Find and open save files
        save_dir = os.path.dirname(save_file)
        base_name = os.path.basename(save_file)
        mdat_files = glob.glob(os.path.join(save_dir,
                "proc*_{}".format(base_name)))
        if len(mdat_files) > 0:
            tmp_mdat = sio.loadmat(mdat_files[0])
        else:
            tmp_mdat = sio.loadmat(save_file)
        if num_chains is None: 
            num_chains = np.squeeze(tmp_mdat['num_chains'])
        num_chains_pproc = num_chains / comm.size
        if len(mdat_files) == 0:
            logging.info("HOT START using serial file")
            mdat = sio.loadmat(save_file)
            if num_chains is None: 
                num_chains = np.squeeze(mdat['num_chains'])
            num_chains_pproc = num_chains / comm.size
            disc = sample.load_discretization(save_file)
            kern_old = np.squeeze(mdat['kern_old'])
            all_step_ratios = np.squeeze(mdat['step_ratios'])
            chain_length = disc.check_nums()/num_chains
            if all_step_ratios.shape == (num_chains,
                                                chain_length):
                msg = "Serial file, from completed"
                msg += " run updating hot_start"
                hot_start = 2
            # reshape if parallel
            if comm.size > 1:
                temp_input = np.reshape(disc._input_sample_set.\
                        get_values(), (num_chains,
                            chain_length, -1), 'F')
                temp_output = np.reshape(disc._output_sample_set.\
                        get_values(), (num_chains,
                            chain_length, -1), 'F')
                all_step_ratios = np.reshape(all_step_ratios,
                         (num_chains, -1), 'F')
        elif hot_start == 1 and len(mdat_files) == comm.size:
            logging.info("HOT START using parallel files (same nproc)")
            # if the number of processors is the same then set mdat to
            # be the one with the matching processor number (doesn't
            # really matter)
            disc = sample.load_discretization(mdat_files[comm.rank])
            kern_old = np.squeeze(tmp_mdat['kern_old'])
            all_step_ratios = np.squeeze(tmp_mdat['step_ratios'])
        elif hot_start == 1 and len(mdat_files) != comm.size:
            logging.info("HOT START using parallel files (diff nproc)")
            # Determine how many processors the previous data used
            # otherwise gather the data from mdat and then scatter
            # among the processors and update mdat
            mdat_files_local = comm.scatter(mdat_files)
            mdat_local = [sio.loadmat(m) for m in mdat_files_local]
            disc_local = [sample.load_discretization(m) for m in\
                    mdat_files_local]
            mdat_list = comm.allgather(mdat_local)
            disc_list = comm.allgather(disc_local)
            mdat_global = []
            disc_global = []
            # instead of a list of lists, create a list of mdat
            for mlist, dlist in zip(mdat_list, disc_list): 
                mdat_global.extend(mlist)
                disc_global.extend(dlist)
            # get num_proc and num_chains_pproc for previous run
            old_num_proc = max((len(mdat_list), 1))
            old_num_chains_pproc = num_chains/old_num_proc
            # get batch size and/or number of dimensions
            chain_length = disc_global[0].check_nums()/\
                    old_num_chains_pproc
            disc = disc_global[0].copy()
            # create lists of local data
            temp_input = []
            temp_output = []
            all_step_ratios = []
            kern_old = []
            # RESHAPE old_num_chains_pproc, chain_length(or batch), dim
            for mdat, disc_local in zip(mdat_global, disc_local):
                temp_input.append(np.reshape(disc_local.\
                        _input_sample_set.get_values_local(),
                        (old_num_chains_pproc, chain_length, -1), 'F'))
                temp_output.append(np.reshape(disc_local.\
                        _output_sample_set.get_values_local(),
                        (old_num_chains_pproc, chain_length, -1), 'F'))
                all_step_ratios.append(np.reshape(mdat['step_ratios'],
                    (old_num_chains_pproc, chain_length, -1), 'F'))
                kern_old.append(np.reshape(mdat['kern_old'],
                    (old_num_chains_pproc,), 'F'))
            # turn into arrays
            temp_input = np.concatenate(temp_input)
            temp_output = np.concatenate(temp_output)
            all_step_ratios = np.concatenate(all_step_ratios)
            kern_old = np.concatenate(kern_old)
    if hot_start == 2: # HOT START FROM COMPLETED RUN:
        if comm.rank == 0:
            logging.info("HOT START from completed run")
        mdat = sio.loadmat(save_file)
        if num_chains is None: 
            num_chains = np.squeeze(mdat['num_chains'])
        num_chains_pproc = num_chains / comm.size
        disc = sample.load_discretization(save_file)
        kern_old = np.squeeze(mdat['kern_old'])
        all_step_ratios = np.squeeze(mdat['step_ratios'])
        chain_length = disc.check_nums()/num_chains
        # reshape if parallel
        if comm.size > 1:
            temp_input = np.reshape(disc._input_sample_set.\
                        get_values(), (num_chains, chain_length,
                            -1), 'F')
            temp_output = np.reshape(disc._output_sample_set.\
                        get_values(), (num_chains, chain_length,
                            -1), 'F')
            all_step_ratios = np.reshape(all_step_ratios,
                    (num_chains, chain_length), 'F')
    # SPLIT DATA IF NECESSARY
    if comm.size > 1 and (hot_start == 2 or (hot_start == 1 and \
            len(mdat_files) != comm.size)):
        # Use split to split along num_chains and set *._values_local
        disc._input_sample_set.set_values_local(np.reshape(np.split(\
                temp_input, comm.size, 0)[comm.rank],
                (num_chains_pproc*chain_length, -1), 'F'))
        disc._output_sample_set.set_values_local(np.reshape(np.split(\
                temp_output, comm.size, 0)[comm.rank],
                (num_chains_pproc*chain_length, -1), 'F'))
        all_step_ratios = np.reshape(np.split(all_step_ratios,
            comm.size, 0)[comm.rank],
            (num_chains_pproc*chain_length,), 'F')
        kern_old = np.reshape(np.split(kern_old, comm.size,
            0)[comm.rank], (num_chains_pproc,), 'F')
    else:
        all_step_ratios = np.reshape(all_step_ratios, (-1,), 'F')
    print chain_length*num_chains, chain_length, lb_model
    new_sampler = sampler(chain_length*num_chains, chain_length, lb_model) 
    return (new_sampler, disc, all_step_ratios, kern_old)
Beispiel #5
0
    def generalized_chains(self, param_min, param_max, t_set, kern,
            savefile, initial_sample_type="random", criterion='center',
            hot_start=0): 
        """
        Basic adaptive sampling algorithm using generalized chains.
       
        :param string initial_sample_type: type of initial sample random (or r),
            latin hypercube(lhs), or space-filling curve(TBD)
        :param param_min: minimum value for each parameter dimension
        :type param_min: :class:`numpy.ndarray` (ndim,)
        :param param_max: maximum value for each parameter dimension
        :type param_max: :class:`numpy.ndarray` (ndim,)
        :param t_set: method for creating new parameter steps using
            given a step size based on the paramter domain size
        :type t_set: :class:`bet.sampling.adaptiveSampling.transition_set`
        :param kern: functional that acts on the data used to
            determine the proposed change to the ``step_size``
        :type kernel: :class:~`bet.sampling.adaptiveSampling.kernel` object.
        :param string savefile: filename to save samples and data
        :param int hot_start: Flag whether or not hot start the sampling
            chains from a previous set of chains. Note that ``num_chains`` must
            be the same, but ``num_chains_pproc`` need not be the same. 0 -
            cold start, 1 - hot start from uncompleted run, 2 - hot
            start from finished run
        :param string criterion: latin hypercube criterion see 
            `PyDOE <http://pythonhosted.org/pyDOE/randomized.html>`_
        
        :rtype: tuple
        :returns: (``parameter_samples``, ``data_samples``,
            ``all_step_ratios``) where ``parameter_samples`` is np.ndarray of
            shape (num_samples, ndim), ``data_samples`` is np.ndarray of shape
            (num_samples, mdim), and ``all_step_ratios`` is np.ndarray of shape
            (num_chains, chain_length)
        
        """
        if comm.size > 1:
            psavefile = os.path.join(os.path.dirname(savefile),
                    "proc{}_{}".format(comm.rank, os.path.basename(savefile)))

        # Initialize Nx1 vector Step_size = something reasonable (based on size
        # of domain and transition set type)
        # Calculate domain size
        param_left = np.repeat([param_min], self.num_chains_pproc, 0)
        param_right = np.repeat([param_max], self.num_chains_pproc, 0)

        param_width = param_right - param_left
        # Calculate step_size
        max_ratio = t_set.max_ratio
        min_ratio = t_set.min_ratio

        if not hot_start:
            step_ratio = t_set.init_ratio*np.ones(self.num_chains_pproc)
           
            # Initiative first batch of N samples (maybe taken from latin
            # hypercube/space-filling curve to fully explore parameter space -
            # not necessarily random). Call these Samples_old.
            (samples_old, data_old) = super(sampler, self).random_samples(
                    initial_sample_type, param_min, param_max, savefile,
                    self.num_chains, criterion)
            self.num_samples = self.chain_length * self.num_chains
            comm.Barrier()
            
            # now split it all up
            if comm.size > 1:
                MYsamples_old = np.empty((np.shape(samples_old)[0]/comm.size,
                    np.shape(samples_old)[1])) 
                comm.Scatter([samples_old, MPI.DOUBLE], [MYsamples_old,
                    MPI.DOUBLE])
                MYdata_old = np.empty((np.shape(data_old)[0]/comm.size,
                    np.shape(data_old)[1])) 
                comm.Scatter([data_old, MPI.DOUBLE], [MYdata_old, MPI.DOUBLE])
            else:
                MYsamples_old = np.copy(samples_old)
                MYdata_old = np.copy(data_old)

            samples = MYsamples_old
            data = MYdata_old
            all_step_ratios = step_ratio
            (kern_old, proposal) = kern.delta_step(MYdata_old, None)
            start_ind = 1
        if hot_start:
            # LOAD FILES
            if hot_start == 1: # HOT START FROM PARTIAL RUN
                if comm.rank == 0:
                    print "HOT START from partial run"
                # Find and open save files
                save_dir = os.path.dirname(savefile)
                base_name = os.path.dirname(savefile)
                mdat_files = glob.glob(os.path.join(save_dir,
                        "proc*_{}".format(base_name)))
                if len(mdat_files) == 0:
                    print "HOT START using serial file"
                    mdat = sio.loadmat(savefile)
                    samples = mdat['samples']
                    data = mdat['data']
                    kern_old = np.squeeze(mdat['kern_old'])
                    all_step_ratios = np.squeeze(mdat['step_ratios'])
                    chain_length = samples.shape[0]/self.num_chains
                    if all_step_ratios.shape == (self.num_chains,
                            chain_length):
                        print "Serial file, from completed run updating hot_start"
                        hot_start = 2
                    # reshape if parallel
                    if comm.size > 1:
                        samples = np.reshape(samples, (self.num_chains,
                            chain_length, -1), 'F')
                        data = np.reshape(data, (self.num_chains,
                            chain_length, -1), 'F')
                        all_step_ratios = np.reshape(all_step_ratios,
                                (self.num_chains, -1), 'F')
                elif hot_start == 1 and len(mdat_files) == comm.size:
                    print "HOT START using parallel files (same nproc)"
                    # if the number of processors is the same then set mdat to
                    # be the one with the matching processor number (doesn't
                    # really matter)
                    mdat = sio.loadmat(mdat_files[comm.rank])
                    samples = mdat['samples']
                    data = mdat['data']
                    kern_old = np.squeeze(mdat['kern_old'])
                    all_step_ratios = np.squeeze(mdat['step_ratios'])
                elif hot_start == 1 and len(mdat_files) != comm.size:
                    print "HOT START using parallel files (diff nproc)"
                    # Determine how many processors the previous data used
                    # otherwise gather the data from mdat and then scatter
                    # among the processors and update mdat
                    mdat_files_local = comm.scatter(mdat_files)
                    mdat_local = [sio.loadmat(m) for m in mdat_files_local]
                    mdat_list = comm.allgather(mdat_local)
                    mdat_global = []
                    # instead of a list of lists, create a list of mdat
                    for mlist in mdat_list:
                        mdat_global.extend(mlist)
                    # get num_proc and num_chains_pproc for previous run
                    old_num_proc = max((len(mdat_list), 1))
                    old_num_chains_pproc = self.num_chains/old_num_proc
                    # get batch size and/or number of dimensions
                    chain_length = mdat_global[0]['samples'].shape[0]/\
                            old_num_chains_pproc
                    # create lists of local data
                    samples = []
                    data = []
                    all_step_ratios = []
                    kern_old = []
                    # RESHAPE old_num_chains_pproc, chain_length(or batch), dim
                    for mdat in mdat_global:
                        samples.append(np.reshape(mdat['samples'],
                            (old_num_chains_pproc, chain_length, -1), 'F'))
                        data.append(np.reshape(mdat['data'],
                            (old_num_chains_pproc, chain_length, -1), 'F'))
                        all_step_ratios.append(np.reshape(mdat['step_ratios'],
                            (old_num_chains_pproc, chain_length, -1), 'F'))
                        kern_old.append(np.reshape(mdat['kern_old'],
                            (old_num_chains_pproc,), 'F'))
                    # turn into arrays
                    samples = np.concatenate(samples)
                    data = np.concatenate(data)
                    all_step_ratios = np.concatenate(all_step_ratios)
                    kern_old = np.concatenate(kern_old)
            if hot_start == 2: # HOT START FROM COMPLETED RUN:
                if comm.rank == 0:
                    print "HOT START from completed run"
                mdat = sio.loadmat(savefile)
                samples = mdat['samples']
                data = mdat['data']
                kern_old = np.squeeze(mdat['kern_old'])
                all_step_ratios = np.squeeze(mdat['step_ratios'])
                chain_length = samples.shape[0]/self.num_chains
                mdat_files = []
                # reshape if parallel
                if comm.size > 1:
                    samples = np.reshape(samples, (self.num_chains,
                        chain_length, -1), 'F')
                    data = np.reshape(data, (self.num_chains,
                        chain_length, -1), 'F')
                    all_step_ratios = np.reshape(all_step_ratios,
                            (self.num_chains, chain_length), 'F')
            # SPLIT DATA IF NECESSARY
            if comm.size > 1 and (hot_start == 2 or (hot_start == 1 and \
                    len(mdat_files) != comm.size)):
                # Use split to split along num_chains
                samples = np.reshape(np.split(samples, comm.size,
                    0)[comm.rank], (self.num_chains_pproc*chain_length, -1),
                    'F')
                data = np.reshape(np.split(data, comm.size, 0)[comm.rank],
                        (self.num_chains_pproc*chain_length, -1), 'F')
                all_step_ratios = np.reshape(np.split(all_step_ratios,
                    comm.size, 0)[comm.rank],
                    (self.num_chains_pproc*chain_length,), 'F')
                kern_old = np.reshape(np.split(kern_old, comm.size,
                    0)[comm.rank], (self.num_chains_pproc,), 'F')
            else:
                all_step_ratios = np.reshape(all_step_ratios, (-1,), 'F')
            # Set samples, data, all_step_ratios, mdat, step_ratio,
            # MYsamples_old, and kern_old accordingly
            step_ratio = all_step_ratios[-self.num_chains_pproc:]
            MYsamples_old = samples[-self.num_chains_pproc:, :]
            # Determine how many batches have been run
            start_ind = samples.shape[0]/self.num_chains_pproc
        
        mdat = dict()
        self.update_mdict(mdat)
        for batch in xrange(start_ind, self.chain_length):
            # For each of N samples_old, create N new parameter samples using
            # transition set and step_ratio. Call these samples samples_new.
            samples_new = t_set.step(step_ratio, param_width,
                    param_left, param_right, MYsamples_old)
        
            # Solve the model for the samples_new.
            data_new = self.lb_model(samples_new)
            
            # Make some decision about changing step_size(k).  There are
            # multiple ways to do this.
            # Determine step size
            (kern_old, proposal) = kern.delta_step(data_new, kern_old)
            step_ratio = proposal*step_ratio
            # Is the ratio greater than max?
            step_ratio[step_ratio > max_ratio] = max_ratio
            # Is the ratio less than min?
            step_ratio[step_ratio < min_ratio] = min_ratio

            # Save and export concatentated arrays
            if self.chain_length < 4:
                pass
            elif comm.rank == 0 and (batch+1)%(self.chain_length/4) == 0:
                print "Current chain length: "+\
                            str(batch+1)+"/"+str(self.chain_length)
            samples = np.concatenate((samples, samples_new))
            data = np.concatenate((data, data_new))
            all_step_ratios = np.concatenate((all_step_ratios, step_ratio))
            mdat['step_ratios'] = all_step_ratios
            mdat['samples'] = samples
            mdat['data'] = data
            mdat['kern_old'] = kern_old
            if comm.size > 1:
                super(sampler, self).save(mdat, psavefile)
            else:
                super(sampler, self).save(mdat, savefile)
            MYsamples_old = samples_new

        # collect everything
        MYsamples = np.copy(samples)
        MYdata = np.copy(data)
        MYall_step_ratios = np.copy(all_step_ratios)
        # ``parameter_samples`` is np.ndarray of shape (num_samples, ndim)
        samples = util.get_global_values(MYsamples,
                shape=(self.num_samples, np.shape(MYsamples)[1]))           
        # and ``data_samples`` is np.ndarray of shape (num_samples, mdim)
        data = util.get_global_values(MYdata, shape=(self.num_samples,
            np.shape(MYdata)[1]))
        # ``all_step_ratios`` is np.ndarray of shape (num_chains,
        # chain_length)
        all_step_ratios = util.get_global_values(MYall_step_ratios,
                shape=(self.num_samples,))
        all_step_ratios = np.reshape(all_step_ratios, (self.num_chains,
            self.chain_length), 'F')

        # save everything
        mdat['step_ratios'] = all_step_ratios
        mdat['samples'] = samples
        mdat['data'] = data
        mdat['kern_old'] = util.get_global_values(kern_old,
                shape=(self.num_chains,))
        super(sampler, self).save(mdat, savefile)

        return (samples, data, all_step_ratios)