Esempio n. 1
0
def _create_cache(t0, t1, w0, w1, entropy, pool_size, k):
    ts = [t0, t1]
    ws = [w0, w1]

    parent = SeedSequence(entropy=entropy, pool_size=pool_size)
    seeds = [parent]

    for level in range(1, k + 1):
        new_ts = []
        new_ws = []
        new_seeds = []
        for i, parent in enumerate(seeds):
            seedv, seedl, seedr = parent.spawn(3)
            new_seeds.extend([seedl, seedr])

            t0, t1 = ts[i], ts[i + 1]
            w0, w1 = ws[i], ws[i + 1]
            t = (t0 + t1) / 2
            w = utils.brownian_bridge(t0=t0,
                                      t1=t1,
                                      w0=w0,
                                      w1=w1,
                                      t=t,
                                      seed=seedv)
            new_ts.extend([ts[i], t])
            new_ws.extend([ws[i], w])

        new_ts.append(ts[-1])
        new_ws.append(ws[-1])
        ts = new_ts
        ws = new_ws
        seeds = new_seeds

    return ts, ws, seeds
Esempio n. 2
0
    def reseed(self, seed_seq=None):
        """
        Get new random number generator.

        Parameters
        ----------
        seed_seq : np.random.SeedSequence, rlberry.seeding.Seeder or int, default : None
            Seed sequence from which to spawn the random number generator.
            If None, generate random seed.
            If int, use as entropy for SeedSequence.
            If seeder, use seeder.seed_seq
        """
        # if None, new seed sequence
        if seed_seq is None:
            seed_seq = SeedSequence()
        # if SeedSequence, do nothing
        elif isinstance(seed_seq, SeedSequence):
            seed_seq = seed_seq
        # if Seeder, get Seeder.seed_seq
        elif isinstance(seed_seq, Seeder):
            seed_seq = seed_seq.seed_seq
        # if integer, new SeedSequence
        else:
            seed_seq = SeedSequence(seed_seq)

        # spawn
        seed_seq = seed_seq.spawn(1)[0]

        self.seed_seq = seed_seq
        self.rng = default_rng(self.seed_seq)
Esempio n. 3
0
def retry(store,
          prob,
          algo,
          num_retries,
          value_limit=math.inf,
          popsize=1,
          workers=mp.cpu_count()):
    try:
        import pygmo as pg
    except ImportError as e:
        raise ImportError(
            "Please install PYGMO (pip install pygmo) to use PAGMO optimizers"
        ) from e
    sg = SeedSequence()
    rgs = [Generator(MT19937(s)) for s in sg.spawn(workers)]
    proc = [
        Process(target=_retry_loop,
                args=(pid, rgs, store, prob, algo, num_retries, value_limit,
                      popsize, pg)) for pid in range(workers)
    ]
    [p.start() for p in proc]
    [p.join() for p in proc]
    store.sort()
    store.dump()
    return OptimizeResult(x=store.get_x_best(),
                          fun=store.get_y_best(),
                          nfev=store.get_count_evals(),
                          success=True)
Esempio n. 4
0
    def __init__(self, seed=None, comm=MPI.COMM_WORLD):
        """Create independent random number generators in parallel

        Optional keyword arguments:
        seed=None: seed the Gnerator to get a reproducible stream.
        comm=MPI.COMM_WORLD: The MPI communicator

        Creates an independent np.random.Generator in each MPI process. This
        generator can be retrived with the __call__ method, e.g.

        from KSFD import Generator
        ...
        kgen = Generator(seed)
        rng = kgen()

        Also, the class method get_rng() will retrieve the process-wide
        npp.random.Generator, so that you don't need to carry the Generator
        instance around with you:

        rng = Generator.get_rng()
        """
        if seed is None and self._rng is not None:
            #
            # already set -- nothing to do
            #
            return
        size = comm.size
        rank = comm.rank
        ss = SeedSequence(seed)
        seeds = ss.spawn(size)
        type(self)._seeds = seeds
        type(self)._rng = default_rng(seeds[rank])
        return
Esempio n. 5
0
def generate_random_configurations(box_length, n_part, n_dim, n_ensemble,
                                   seed_entropy):
    """ Generates a set of configurations in an ensemble

    Parameters
    ----------
    box_length : float
        length of the box
    n_part : int
        number of particles
    n_dim : int
        dimension of the system
    seed_coords : int
        seed for random number generator
    n_ensemble : int
        number of configurations to generate

    Returns
    -------
    list of configurations : list of numpy arrays
        list of configurations in the ensemble
    seeds : list of ints
        list of seeds for random number generator
        for each configuration
    """
    sq = SeedSequence(seed_entropy)
    seeds = sq.spawn(n_ensemble)
    coords_list = []
    for seed in seeds:
        initial_coords = generate_random_configuration_single(
            box_length, n_part, n_dim, seed)
        coords_list.append(initial_coords)
    return (coords_list, seeds)
Esempio n. 6
0
    def __init__(self, seed_seq=None, spawn_seed_seq=True):
        """
        Parameters
        ----------
        seed_seq : np.random.SeedSequence, rlberry.seeding.Seeder or int, default : None
            Seed sequence from which to spawn the random number generator.
            If None, generate random seed.
            If int, use as entropy for SeedSequence.
            If seeder, use seeder.seed_seq
        spawn_seed_seq : bool, default : True
            If True, uses seed_seq to spawn a new seed sequence (strongly recommended) for the Seeder.
            If False, uses the input seed_seq to define the Seeder.
            Warning: Setting to false can lead to unexpected behavior. This argument is only used internally
            in rlberry, in Seeder.spawn(), to avoid unnecessary spawning.
        """
        super().__init__()
        if seed_seq is None:
            seed_seq = SeedSequence()
        elif isinstance(seed_seq, SeedSequence):
            seed_seq = seed_seq
        elif isinstance(seed_seq, Seeder):
            seed_seq = seed_seq.seed_seq
        else:  # integer
            seed_seq = SeedSequence(seed_seq)

        if spawn_seed_seq:
            seed_seq = seed_seq.spawn(1)[0]

        self.seed_seq = seed_seq
        self.rng = default_rng(self.seed_seq)
Esempio n. 7
0
    def run(self):

        ### 1.- Especificar limites maximos y minimos
        self.valid_args_limit()

        chunk = int(np.ceil(self.maxiter / self.workers))
        seq = SeedSequence()
        random = seq.spawn(self.workers)

        if self.debug:
            print()
            print('maxiter {} chunk {} workers {} threads {} cats {}'.format(
                self.maxiter, chunk, self.workers, self.n_threads,
                self.n_cats))
            print()
            print('exec pstree -p', os.getpid())

        best = None

        if self.workers == 1:
            best = self.__worker__(0, default_rng(random[0]), self.maxiter,
                                   None)
        else:

            best = [[]] * 4
            best[self.BEST_FUNC_TEST_VALUE] = np.inf

            shared_best = Array('d', [0.0] * (self.dimension + 4), lock=True)
            shared_best[self.BEST_FUNC_TEST_VALUE] = np.Inf

            jobs = []

            for pid in range(self.workers):
                p = Process(target=self.__worker__,
                            args=(pid, default_rng(random[pid]), chunk,
                                  shared_best))
                jobs.append(p)

                p.start()

            for job in jobs:
                job.join()

            best[self.BEST_PROCESS_ID] = shared_best[self.BEST_PROCESS_ID]
            best[self.BEST_CAT_INDEX] = shared_best[self.BEST_CAT_INDEX]
            best[self.BEST_FUNC_TEST_VALUE] = shared_best[
                self.BEST_FUNC_TEST_VALUE]

            for i in range(self.dimension):
                best[self.BEST_CAT_POSITION].append(
                    shared_best[self.BEST_CAT_POSITION + i])

        # Se parchea la salida si se esta maximizando para optener el valor real: -f(x)
        if self.maximize:
            best[self.BEST_FUNC_TEST_VALUE] = -best[self.BEST_FUNC_TEST_VALUE]

        if self.debug:
            print()

        return best
Esempio n. 8
0
def test_seedsequence():
    from numpy.random.bit_generator import (ISeedSequence,
                                            ISpawnableSeedSequence,
                                            SeedlessSeedSequence)

    s1 = SeedSequence(range(10), spawn_key=(1, 2), pool_size=6)
    s1.spawn(10)
    s2 = SeedSequence(**s1.state)
    assert_equal(s1.state, s2.state)
    assert_equal(s1.n_children_spawned, s2.n_children_spawned)

    # The interfaces cannot be instantiated themselves.
    assert_raises(TypeError, ISeedSequence)
    assert_raises(TypeError, ISpawnableSeedSequence)
    dummy = SeedlessSeedSequence()
    assert_raises(NotImplementedError, dummy.generate_state, 10)
    assert len(dummy.spawn(10)) == 10
Esempio n. 9
0
def _retry(minimizer):
    sg = SeedSequence()
    rgs = [Generator(MT19937(s)) for s in sg.spawn(minimizer.workers)]
    procs = [
        Process(target=_retry_loop, args=(pid, rgs, minimizer))
        for pid in range(minimizer.workers)
    ]
    [p.start() for p in procs]
    return procs
Esempio n. 10
0
    def __init__(self, n, seed=None, threads=None):
        if threads is None:
            threads = multiprocessing.cpu_count()
        self.threads = threads

        seq = SeedSequence(seed)
        self._random_generators = [default_rng(s) for s in seq.spawn(threads)]

        self.n = n
        self.executor = concurrent.futures.ThreadPoolExecutor(threads)
        self.values = np.empty(n)
        self.step = np.ceil(n / threads).astype(np.int_)
Esempio n. 11
0
def retry(fun, store, optimize, num_retries, value_limit = math.inf, 
          workers=mp.cpu_count(), stop_fitness = -math.inf):
    sg = SeedSequence()
    rgs = [Generator(MT19937(s)) for s in sg.spawn(workers)]
    proc=[Process(target=_retry_loop,
            args=(pid, rgs, fun, store, optimize, num_retries, value_limit, stop_fitness)) for pid in range(workers)]
    [p.start() for p in proc]
    [p.join() for p in proc]
    store.sort()
    store.dump()
    return OptimizeResult(x=store.get_x_best(), fun=store.get_y_best(), 
                          nfev=store.get_count_evals(), success=True)
Esempio n. 12
0
def mo_retry(fun, weight_bounds, ncon, y_exp, store, optimize, num_retries, value_limits, 
          workers=mp.cpu_count()):
    sg = SeedSequence()
    rgs = [Generator(MT19937(s)) for s in sg.spawn(workers)]
    proc=[Process(target=_retry_loop,
            args=(pid, rgs, fun, weight_bounds, ncon, y_exp, 
                  store, optimize, num_retries, value_limits)) for pid in range(workers)]
    [p.start() for p in proc]
    [p.join() for p in proc]
    store.sort()
    store.dump()
    return store.get_xs()
Esempio n. 13
0
def _generate_args(data_dir):
    vehicle_data = osp.join(data_dir, "Vehicle Data", "Simulation Snapshot")
    ss = SeedSequence(SEED)
    seeds = ss.spawn(N_FILES)

    for chunk in range(N_FILES):
        fname = osp.join(vehicle_data,
                         "Snapshot_" + str(chunk * 1000000) + ".csv")

        sys.stderr.write("Processing chunk {}...\n".format(chunk))
        sys.stderr.flush()

        yield (seeds[chunk], fname)
Esempio n. 14
0
def calculate_variable_importance(explainer, type, loss_function, variables, N,
                                  B, label, processes, keep_raw_permutations,
                                  random_state):
    if processes == 1:
        result = [None] * B
        for i in range(B):
            result[i] = loss_after_permutation(explainer.data, explainer.y,
                                               explainer.model,
                                               explainer.predict_function,
                                               loss_function, variables, N,
                                               np.random)
    else:
        # Create number generator for each iteration
        ss = SeedSequence(random_state)
        generators = [default_rng(s) for s in ss.spawn(B)]
        pool = mp.Pool(processes)
        result = pool.starmap_async(
            loss_after_permutation,
            [(explainer.data, explainer.y, explainer.model,
              explainer.predict_function, loss_function, variables, N,
              generators[i]) for i in range(B)]).get()
        pool.close()

    raw = pd.concat(result, sort=True)
    result = raw.mean().sort_values().reset_index()
    result['label'] = label

    result.rename(columns={
        0: 'dropout_loss',
        'index': 'variable'
    },
                  inplace=True)

    if type == "ratio":
        result.loc[:,
                   'dropout_loss'] = result.loc[:, 'dropout_loss'] / result.loc[
                       result.variable == '_full_model_',
                       'dropout_loss'].values

    if type == "difference":
        result.loc[:,
                   'dropout_loss'] = result.loc[:,
                                                'dropout_loss'] - result.loc[
                                                    result.variable ==
                                                    '_full_model_',
                                                    'dropout_loss'].values

    raw_permutations = raw.reset_index(
        drop=True) if keep_raw_permutations else None

    return result, raw_permutations
Esempio n. 15
0
 def _set_rng(self):
     """
     Initialize random generator stream. For seeded runs, sets the state reproducibly.
     """
     # TODO: checkpointing save of self._rng.bit_generator.state per process
     if mpi.is_main_process():
         seed = getattr(self, "seed", None)
         if seed is not None:
             self.mpi_warning("This run has been SEEDED with seed %s", seed)
         ss = SeedSequence(seed)
         child_seeds = ss.spawn(mpi.size())
     else:
         child_seeds = None
     ss = mpi.scatter(child_seeds)
     self._entropy = ss.entropy  # for debugging store for reproducibility
     self._rng = default_rng(ss)
Esempio n. 16
0
def run_experiment(c):
    # timestamp to identify the experiment
    timestamp = datetime.now()

    # multiprocessing initialization
    manager = mp.Manager()
    queue = manager.Queue()
    pool = mp.Pool(mp.cpu_count() + 1)

    # calculate number of total iterations to show progress
    num_iterations_total = len(c['DATASETS']) * len(
        c['OPTIMIZERS']) * c['NUM_ITERATIONS']

    # start process which listens to the queue and writes new results to file
    result_writer = pool.apply_async(
        queue_listener, (queue, c, timestamp, num_iterations_total))

    # create independent random generator objects (streams) for every iteration
    seed_sequence = SeedSequence(12345)
    child_seeds = seed_sequence.spawn(num_iterations_total)
    random_streams = iter([default_rng(s) for s in child_seeds])

    # create all the workers which each compute one iteration
    iterations = []
    for dataset in c['DATASETS']:
        for opt_name, opt_params in c['OPTIMIZERS'].items():
            for iteration_idx in range(c['NUM_ITERATIONS']):
                rng = next(random_streams)
                iteration = pool.apply_async(
                    run_iteration, (c, dataset, iteration_idx, opt_name,
                                    opt_params, timestamp, queue, rng))
                iterations.append(iteration)

    # collect results from the workers through the pool result queue
    for iteration in iterations:
        iteration.get()

    # now we are done, kill the listener
    queue.put('kill')
    pool.close()
    pool.join()
Esempio n. 17
0
def create_bit_generator(seed=None, stream=0):
    """Creates an instance of a ``BIT_GENERATOR``.

    Parameters
    ----------
    seed : int, optional
        The seed to use. If seed is None (the default), will create a seed
        using :py:func:`create_seed`.
    stream : int, optional
        The stream to create the bit generator for. This allows multiple
        generators to exist with the same seed, but that produce different sets
        of random numbers. Default is 0.

    Returns
    -------
    BIT_GENERATOR :
        The bit generator initialized with the given seed and stream.
    """
    # create the seed sequence
    seedseq = SeedSequence(create_seed(seed))
    if stream > 0:
        seedseq = seedseq.spawn(stream + 1)[stream]
    return BIT_GENERATOR(seedseq)
Esempio n. 18
0
def _main_parallel(args):
    """Spawn child processes after global setup to speed up rendering"""
    model_dirs = get_split(args)
    end_idx = args.end_idx if args.end_idx > 0 else len(model_dirs)
    n_instances = end_idx - args.start_idx

    # need to pass in separate RNGs into the child processes.
    seed_gen = SeedSequence(9)
    rngs = [Generator(MT19937(sg)) for sg in seed_gen.spawn(n_instances)]

    futures = []
    with ProcessPoolExecutor(max_workers=16) as executor:
        for model_dir, rng in zip(model_dirs[args.start_idx:end_idx], rngs):
            sel_dirs = [model_dir]
            for _ in range(args.n_objects - 1):
                sel_dirs.append(rng.choice(model_dirs))
            futures.append(executor.submit(
                render_views,
                args,
                sel_dirs,
                rng,
            ))
        for future in futures:
            _ = future.result()
def generate_save_random_configurations(foldpath,
                                        ensemble_size,
                                        seed_entropy=None,
                                        seed_radii=0,
                                        subfoldname=SUB_FOLD_NAME):
    """ generates and saves random configurations 
        for particles given a folder with particular configurations
        each random configuration is stored in its own file that defines the ensemble
    
    Args:
       foldname ${1:arg1}
       seed_radii: seed for radii
       seed_coords_list: seeds list for random configuration if none generates from seedsequence
       subfoldname ${2:arg2} (default 'random_configurations_run')
    """

    # print(foldpath)
    # print(str(foldpath)+'/systemdata.json')
    foldpath = str(foldpath)
    params = load_params(foldpath)
    radii = generate_random_radii_inverse_power(params,
                                                seed_radii,
                                                foldpath,
                                                subfoldname=SUB_FOLD_NAME)
    box_length = get_box_length(radii, params.ndim.value, params.phi.value)

    run_foldpath = foldpath + "/" + subfoldname + "/"
    os.makedirs(run_foldpath, exist_ok=True)
    entropy_file = run_foldpath + "/" + "seed_entropy.txt"

    if seed_entropy == None:
        # check whether entropy already exists in the folder
        previous_entropy_exists = os.path.isfile(entropy_file)

        if previous_entropy_exists:
            # hacky way to deal with large integer issues with seeds
            seed_entropy = np.loadtxt(entropy_file, delimiter=",", dtype=str)
            seed_entropy = int(seed_entropy)
        else:
            # generate entropy and save to file
            sq = SeedSequence()
            seed_entropy = sq.entropy
            np.savetxt(
                run_foldpath + "/" + "seed_entropy.txt",
                [seed_entropy],
                delimiter=",",
                fmt="%1.1i",
            )
    else:
        # save user defined entropy/seed to file any ways
        np.savetxt(
            run_foldpath + "/" + "seed_entropy_user_defined.txt",
            [seed_entropy],
            delimiter=",",
            fmt="%1.64i",
        )

    print(seed_entropy)
    sq1 = SeedSequence(seed_entropy)

    seeds = sq1.spawn(ensemble_size)
    assert len(seeds) == ensemble_size
    print(seed_entropy)

    ensemble_path = run_foldpath + "ensemble"
    os.makedirs(ensemble_path, exist_ok=True)

    for seed_coords in seeds:
        coords = generate_random_configuration_single(box_length,
                                                      params.n_part.value,
                                                      params.ndim.value,
                                                      seed_coords)
        # save according to the spawn key
        np.savetxt(ensemble_path + "/" + str(seed_coords.spawn_key[0]), coords)
        print(ensemble_path + "/" + str(seed_coords.spawn_key[0]))
    return 0
        type=int,
        action="store",
        help="Number of CPUs to use. If not specified, uses cpu_count() - 1",
    )
    parser.add_argument(
        "--z_only",
        action="store_true",
        help="Only execute Z-type tests",
    )
    args = parser.parse_args()
    njobs = getattr(args, "ncpu", None)
    njobs = psutil.cpu_count(logical=False) - 1 if njobs is None else njobs
    njobs = max(njobs, 1)

    ss = SeedSequence(ENTROPY)
    children = ss.spawn(len(TRENDS) * EX_NUM)
    generators = [Generator(PCG64(child)) for child in children]
    jobs = []
    count = 0
    for tr in TRENDS:
        for i in range(EX_NUM):
            file_name = os.path.join(OUTPUT_PATH, f"adf_z_{tr}-{i:04d}.npz")
            jobs.append((tr, generators[count], file_name))
            count += 1
    jobs = [job for job in jobs if not os.path.exists(job[-1])]
    random.shuffle(jobs)
    nremconfig = len(jobs)
    nconfig = len(children)
    print(f"Total configurations: {BLUE}{nconfig}{RESET}, "
          f"Remaining: {RED}{nremconfig}{RESET}")
    print(f"Running on {BLUE}{njobs}{RESET} CPUs")
Esempio n. 21
0
    def __init__(self,
                 t0: Union[float, torch.Tensor],
                 w0: torch.Tensor,
                 t1: Optional[Union[float, torch.Tensor]] = None,
                 w1: Optional[torch.Tensor] = None,
                 entropy: Optional[int] = None,
                 tol: float = 1e-6,
                 pool_size: int = 24,
                 cache_depth: int = 9,
                 safety: Optional[float] = None):
        """Initialize the Brownian tree.

        The random value generation process exploits the parallel random number paradigm and uses
        `numpy.random.SeedSequence`. The default generator is PCG64 (used by `default_rng`).

        Args:
            t0: Initial time.
            w0: Initial state.
            t1: Terminal time.
            w1: Terminal state.
            entropy: Global seed, defaults to `None` for random entropy.
            tol: Error tolerance before the binary search is terminated; the search depth ~ log2(tol).
            pool_size: Size of the pooled entropy; should be larger than max depth of queries.
                This parameter affects the query speed significantly.
            cache_depth: Depth of the tree to cache values. This parameter affects the query speed significantly.
            safety: Small float representing some time increment before t0 and after t1.
                In practice, we don't let t0 and t1 of the Brownian tree be the start and terminal times of the
                solutions. This is to avoid issues related to 1) finite precision, and 2) adaptive solver querying time
                points beyond initial and terminal times.
        """
        super(BrownianTree, self).__init__()
        if not utils.is_scalar(t0):
            raise ValueError(
                'Initial time t0 should be a float or 0-d torch.Tensor.')
        if t1 is None:
            t1 = t0 + 1.0
        if not utils.is_scalar(t1):
            raise ValueError(
                'Terminal time t1 should be a float or 0-d torch.Tensor.')
        if t0 > t1:
            raise ValueError(
                f'Initial time {t0} should be less than terminal time {t1}.')
        t0, t1 = float(t0), float(t1)

        parent = SeedSequence(entropy=entropy, pool_size=pool_size)
        w1_seed, w00_seed, w11_seed, parent = parent.spawn(4)

        if w1 is None:
            w1 = w0 + utils.normal_like(w1_seed, w0) * math.sqrt(t1 - t0)

        self._t0 = t0
        self._t1 = t1

        self._entropy = entropy
        self._tol = tol
        self._pool_size = pool_size
        self._cache_depth = cache_depth

        # Boundary guards.
        if safety is None:
            safety = 0.1 * (t1 - t0)
        t00 = t0 - safety
        t11 = t1 + safety

        self._ts_prev = blist.blist()
        self._ws_prev = blist.blist()
        self._ts_prev.extend([t00, t0])
        self._ws_prev.extend(
            [w0 + utils.normal_like(w00_seed, w0) * math.sqrt(t0 - t00), w0])

        self._ts_post = blist.blist()
        self._ws_post = blist.blist()
        self._ts_post.extend([t1, t11])
        self._ws_post.extend(
            [w1, w1 + utils.normal_like(w11_seed, w1) * math.sqrt(t11 - t1)])

        # Cache.
        ts, ws, seeds = _create_cache(t0=t0,
                                      t1=t1,
                                      w0=w0,
                                      w1=w1,
                                      parent=parent,
                                      k=cache_depth)
        self._ts = ts
        self._ws = ws
        self._seeds = seeds

        self._last_depth = None
Esempio n. 22
0
    def compute_all_states(
        self,
        inputs: Sequence[np.ndarray],
        forced_teachers: Sequence[np.ndarray] = None,
        init_state: np.ndarray = None,
        init_fb: np.ndarray = None,
        wash_nr_time_step: int = 0,
        workers: int = -1,
        backend: str = "threading",
        seed: int = None,
        verbose: bool = True,
        memmap: np.memmap = None,
    ) -> Sequence[np.ndarray]:
        """Compute all states generated from sequences of inputs.

        Parameters
        ----------
            inputs: list or array of numpy.array
                All sequences of inputs used for internal state computation.
                Note that it should always be a list of sequences, i.e. if
                only one sequence of inputs is used, it should be alone in a
                list

            forced_teachers: list or array of numpy.array, optional
                Sequence of ground truths, for computation with feedback without
                any trained readout. Note that is should always be a list of
                sequences of the same length than the `inputs`, i.e. if
                only one sequence of inputs is used, it should be alone in a
                list.

            init_state: np.ndarray, optional
                State initialization vector for all inputs. By default, state
                is initialized at 0.

            init_fb: np.ndarray, optional
                Feedback initialization vector for all inputs, if feedback is
                enabled. By default, feedback is initialized at 0.

            wash_nr_time_step: int, optional
                Number of states to consider as transient when training, and to
                remove when computing the readout weights. By default, no states are
                removed.

            workers: int, optional
                If n >= 1, will enable parallelization of states computation with
                n threads/processes, if possible. If n = -1, will use all available
                resources for parallelization. By default, -1.

            backend: {"threadings", "multiprocessing", "loki"}, optional
                Backend used for parallelization of states computations.
                By default, "threading".

            verbose: bool, optional

        Returns:
            list of np.ndarray
                All computed states.
        """

        # initialization of workers
        loop = joblib.Parallel(n_jobs=workers, backend=backend)
        delayed_states = joblib.delayed(self._compute_states)

        # generation of seed sequence
        # each seed in the sequence is independant from the others
        # i.e. this is thread safe random generation.
        # Used for noisy training and running of reservoirs
        ss = SeedSequence(seed)
        # one independant seed per sequence
        child_seeds = ss.spawn(len(inputs))

        # progress bar if needed
        if verbose:
            track = tqdm
        else:

            def track(x, text):
                return x

        inputs_ends = np.cumsum([i.shape[0] for i in inputs])
        inputs_starts = [
            end - i.shape[0] for i, end in zip(inputs, inputs_ends)
        ]

        # no feedback training or running
        if forced_teachers is None:
            all_states = loop(
                delayed_states(inputs[i],
                               wash_nr_time_step=wash_nr_time_step,
                               input_id=i,
                               init_state=init_state,
                               init_fb=init_fb,
                               memmap=memmap,
                               input_pos=(inputs_starts[i], inputs_ends[i]),
                               seed=child_seeds[i],
                               verbose=verbose)
                for i in track(range(len(inputs)), "Computing states"))
        # feedback training
        else:
            all_states = loop(
                delayed_states(inputs[i],
                               forced_teachers[i],
                               wash_nr_time_step=wash_nr_time_step,
                               input_id=i,
                               init_state=init_state,
                               init_fb=init_fb,
                               memmap=memmap,
                               input_pos=(inputs_starts[i], inputs_ends[i]),
                               seed=child_seeds[i],
                               verbose=verbose)
                for i in track(range(len(inputs)), "Computing states"))

        # input ids are used to make sure that the returned states are in the same order
        # as inputs, because parallelization can change this order.
        return [s[1] for s in sorted(all_states, key=lambda x: x[0])]
Esempio n. 23
0
     56208,
     23325,
     29606,
     40099,
     9776,
     46303,
     6333,
     15881,
     63110,
     6022,
     61267,
     56526,
 ]
 entropy = sum([bits << (16 * i) for i, bits in enumerate(entropy_bits)])
 seq = SeedSequence(entropy)
 gen = [Generator(PCG64(child)) for child in seq.spawn(EX_NUM)]
 sample_sizes = (
     20,
     25,
     30,
     35,
     40,
     45,
     50,
     60,
     70,
     80,
     90,
     100,
     120,
     140,
Esempio n. 24
0
def RunTMCMC(N, AllPars, Nm_steps_max, Nm_steps_maxmax, log_likelihood,
             variables, resultsLocation, seed):
    """ Runs TMCMC Algorithm """

    # Initialize (beta, effective sample size)
    beta = 0
    ESS = N
    mytrace = []

    # Initialize other TMCMC variables
    Nm_steps = Nm_steps_max
    parallelize_MCMC = 'yes'  # yes or no
    Adap_calc_Nsteps = 'yes'  # yes or no
    Adap_scale_cov = 'yes'  # yes or no
    scalem = 1  # cov scale factor
    evidence = 1  # model evidence

    # initial samples
    Sm = tmcmcFunctions.initial_population(N, AllPars)

    # Evaluate posterior at Sm
    Priorm = np.array([tmcmcFunctions.log_prior(s, AllPars)
                       for s in Sm]).squeeze()
    Postm = Priorm  # prior = post for beta = 0

    # Evaluate log-likelihood at current samples Sm
    if parallelize_MCMC == 'yes':
        pool = Pool(processes=mp.cpu_count())
        Lmt = pool.starmap(
            runFEM,
            [(ind, Sm[ind], variables, resultsLocation, log_likelihood)
             for ind in range(N)],
        )
        pool.close()
        Lm = np.array(Lmt).squeeze()
    else:
        Lm = np.array([
            runFEM(ind, Sm[ind], variables, resultsLocation, log_likelihood)
            for ind in range(N)
        ]).squeeze()

    while beta < 1:
        # adaptivly compute beta s.t. ESS = N/2 or ESS = 0.95*prev_ESS
        # plausible weights of Sm corresponding to new beta
        beta, Wm, ESS = tmcmcFunctions.compute_beta(beta,
                                                    Lm,
                                                    ESS,
                                                    threshold=0.95)

        # update model evidence
        evidence = evidence * (sum(Wm) / N)

        # Calculate covaraince matrix using Wm_n
        Cm = np.cov(Sm, aweights=Wm / sum(Wm), rowvar=0)

        # Resample ###################################################
        # Resampling using plausible weights
        SmcapIDs = np.random.choice(range(N), N, p=Wm / sum(Wm))
        # SmcapIDs = resampling.stratified_resample(Wm_n)
        Smcap = Sm[SmcapIDs]
        Lmcap = Lm[SmcapIDs]
        Postmcap = Postm[SmcapIDs]

        # save to trace
        # stage m: samples, likelihood, weights, next stage ESS, next stage beta, resampled samples
        mytrace.append([Sm, Lm, Wm, ESS, beta, Smcap])

        # print
        print("beta = %.5f" % beta)
        print("ESS = %d" % ESS)
        print("scalem = %.2f" % scalem)

        # Perturb ###################################################
        # perform MCMC starting at each Smcap (total: N) for Nm_steps
        Em = (scalem**2) * Cm  # Proposal dist covariance matrix

        numProposals = N * Nm_steps
        numAccepts = 0

        # seed to reproduce results
        ss = SeedSequence(seed)
        child_seeds = ss.spawn(N)

        if parallelize_MCMC == 'yes':
            pool = Pool(processes=mp.cpu_count())
            results = pool.starmap(
                tmcmcFunctions.MCMC_MH,
                [(j1, Em, Nm_steps, Smcap[j1], Lmcap[j1], Postmcap[j1], beta,
                  numAccepts, AllPars, log_likelihood, variables,
                  resultsLocation, default_rng(child_seeds[j1]))
                 for j1 in range(N)],
            )
            pool.close()
        else:
            results = [
                tmcmcFunctions.MCMC_MH(j1, Em, Nm_steps, Smcap[j1], Lmcap[j1],
                                       Postmcap[j1], beta, numAccepts, AllPars,
                                       log_likelihood, variables,
                                       resultsLocation,
                                       default_rng(child_seeds[j1]))
                for j1 in range(N)
            ]

        Sm1, Lm1, Postm1, numAcceptsS, all_proposals, all_PLP = zip(*results)
        Sm1 = np.asarray(Sm1)
        Lm1 = np.asarray(Lm1)
        Postm1 = np.asarray(Postm1)
        numAcceptsS = np.asarray(numAcceptsS)
        numAccepts = sum(numAcceptsS)
        all_proposals = np.asarray(all_proposals)
        all_PLP = np.asarray(all_PLP)

        # total observed acceptance rate
        R = numAccepts / numProposals
        print("acceptance rate = %.2f" % R)

        # Calculate Nm_steps based on observed acceptance rate
        if Adap_calc_Nsteps == 'yes':
            # increase max Nmcmc with stage number
            Nm_steps_max = min(Nm_steps_max + 1, Nm_steps_maxmax)
            print("adapted max MCMC steps = %d" % Nm_steps_max)

            acc_rate = max(1. / numProposals, R)
            Nm_steps = min(Nm_steps_max,
                           1 + int(np.log(1 - 0.99) / np.log(1 - acc_rate)))
            print("next MCMC Nsteps = %d" % Nm_steps)

        print('========================')

        # scale factor based on observed acceptance ratio
        if Adap_scale_cov == 'yes':
            scalem = (1 / 9) + ((8 / 9) * R)

        # for next beta
        Sm, Postm, Lm = Sm1, Postm1, Lm1

    # save to trace
    mytrace.append([Sm, Lm, np.ones(len(Wm)), 'notValid', 1, 'notValid'])

    print("evidence = %.10f" % evidence)

    return mytrace
Esempio n. 25
0
def shap(explainer, new_observation, path, keep_distributions, B, processes,
         random_state):
    # Now we know the path, so we can calculate contributions
    # set variable indicators
    # start random path
    p = new_observation.shape[1]

    if processes == 1:
        result_list = [
            iterate_paths(explainer.predict_function, explainer.model,
                          explainer.data, explainer.label, new_observation, p,
                          b + 1, np.random) for b in range(B)
        ]
    else:
        # Create number generator for each iteration
        ss = SeedSequence(random_state)
        generators = [default_rng(s) for s in ss.spawn(B)]
        pool = mp.get_context('spawn').Pool(processes)
        result_list = pool.starmap_async(
            iterate_paths,
            [(explainer.predict_function, explainer.model, explainer.data,
              explainer.label, new_observation, p, b + 1, generators[b])
             for b in range(B)]).get()
        pool.close()

    result = pd.concat(result_list)

    if path is not None:
        if isinstance(path, str) and path == 'average':
            # average over all of the paths
            variable_average = result.pivot(index='variable',
                                            columns='B',
                                            values='contribution').mean(axis=1)
            # sort pd.Series by index of abs-sorted pd.Series
            variable_average_sorted = \
                variable_average.reindex(variable_average.abs().sort_values(ascending=False).index)
            # make the final result - sort and fill with values
            result_average = result_list[0].set_index('variable').reindex(
                variable_average_sorted.index).reset_index()

            result_average = result_average.assign(
                contribution=variable_average_sorted.values,
                B=0,
                sign=np.sign(variable_average_sorted.values))

            result = pd.concat((result, result_average))
        else:
            tmp = get_single_random_path(explainer.predict_function,
                                         explainer.model, explainer.data,
                                         explainer.label, new_observation,
                                         path, 0)

            result = pd.concat((result, tmp))

    if keep_distributions:
        yhats_distributions = calculate_yhats_distributions(explainer)
    else:
        yhats_distributions = None

    target_yhat = explainer.predict(new_observation)[
        0]  # only one new_observation allowed
    data_yhat = explainer.predict(explainer.data)
    baseline_yhat = data_yhat.mean()

    return result, target_yhat, baseline_yhat, yhats_distributions
Esempio n. 26
0
    def generate(self,
                 nb_timesteps: int,
                 warming_inputs: np.ndarray = None,
                 init_state: np.ndarray = None,
                 init_fb: np.ndarray = None,
                 verbose: bool = False,
                 init_inputs: np.ndarray = None,
                 seed: int = None,
                 return_init: bool = None) -> Tuple[np.ndarray, np.ndarray]:
        """Run the ESN on generative mode.

        After the `̀warming_inputs` are consumed, new outputs are
        used as inputs for the next nb_timesteps, i.e. the
        ESN is feeding himself with its own outputs.

        Note that this mode can only work if the ESN is trained
        on a regression task. The outputs of the ESN must be
        the same kind of data as its input.

        To train an ESN on generative mode, use the :py:func:`ESN.train`
        method to train the ESN on a regression task (for
        instance, predict the future data point t+1 of a timeseries
        give the data at time t).

        Parameters
        ----------
            nb_timesteps: int
                Number of timesteps of data to generate
                from the intial input.
            warming_inputs: numpy.ndarray
                Input data used to initiate generative mode.
                This data is meant to "seed" the ESN internal
                states with some real information, before it runs
                on its own created outputs.
            init_state: numpy.ndarray, optional:
                State initialization vector for the reservoir.
                By default, internal state of the reservoir is initialized to 0.
            init_fb: numpy.ndarray, optional
                Feedback initialization vector for the reservoir, if feedback is
                enabled. By default, feedback is initialized to 0.
            verbose: bool, optional
            init_intputs: list of numpy.ndarray, optional
                Same as ``warming_inputs̀``.
                Kept for compatibility with previous version. Deprecated
                since 0.2.2, will be removed soon.
            return_init: bool, optional
                Kept for compatibility with previous version. Deprecated
                since 0.2.2, will be removed soon.

        Returns
        -------
            tuple of numpy.ndarray
                Generated outputs, generated states, warming outputs, warming states

                Generated outputs are the timeseries predicted by the ESN from
                its own predictions over time. Generated states are the
                corresponding internal states.

                Warming outputs are the predictions made by the ESN based on the
                warming inputs passed as parameters. These predictions are prior
                to the generated outputs. Warming states are the corresponding
                internal states. In the case no warming inputs are provided, warming
                outputs and warming states are None.

        """
        if warming_inputs is None and init_state is None and init_inputs is None:
            raise ValueError("at least one of the parameter 'warming_input' "
                             "or 'init_state' must not be None. Impossible "
                             "to generate from scratch.")

        if return_init is not None:
            warnings.warn("Deprecation warning : return_init parameter "
                          "is deprecated since 0.2.2 and will be removed.")

        # for additive noise in the reservoir
        # 2 separate seeds made from one: one for the warming
        # (if needed), one for the generation
        seed = seed if seed is not None else self.seed
        ss = SeedSequence(seed)
        child_seeds = ss.spawn(2)

        if warming_inputs is not None or init_inputs is not None:
            if init_inputs is not None:
                warnings.warn("Deprecation warning : init_inputs parameter "
                              "is deprecated since 0.2.2 and will be removed. "
                              "Please use warming_inputs instead.")
                warming_inputs = init_inputs

            if verbose:
                print(f"Generating {nb_timesteps} timesteps from "
                      f"{warming_inputs.shape[0]} inputs.")
                print("Computing initial states...")

            _, warming_states = self._compute_states(warming_inputs,
                                                     init_state=init_state,
                                                     init_fb=init_fb,
                                                     seed=child_seeds[0])

            # initial state (at begining of generation)
            s0 = warming_states[:, -1].reshape(-1, 1)
            warming_outputs = self.compute_outputs([warming_states])[0]
            # intial input (at begining of generation)
            u1 = warming_outputs[:, -1].reshape(1, -1)

            if init_fb is not None:
                # initial feedback (at begining of generation)
                fb0 = warming_outputs[:, -2].reshape(1, -1)
            else:
                fb0 = None
            warming_outputs = warming_outputs.T
            warming_states = warming_states.T
        else:
            warming_outputs, warming_states = None, None
            # time is often first axis but compute_outputs await
            # for time in second axis, so the reshape :
            s0 = init_state.reshape(-1, 1)

            if init_fb is not None:
                fb0 = init_fb.reshape(-1, 1)
            else:
                fb0 = None

            u1 = self.compute_outputs([s0])[0][:, -1].reshape(1, -1)

        states = np.zeros((nb_timesteps, self.N))
        outputs = np.zeros((nb_timesteps, self.dim_out))

        if verbose:
            track = tqdm
        else:

            def track(x, text):
                return x

        # for additive noise in the reservoir
        rg = default_rng(child_seeds[1])

        for i in track(range(nb_timesteps), "Generating"):
            # from new input u1 and previous state s0
            # compute next state s1 -> s0
            s1 = self._get_next_state(single_input=u1,
                                      feedback=fb0,
                                      last_state=s0,
                                      noise_generator=rg)

            s0 = s1[:, -1].reshape(-1, 1)
            states[i, :] = s0.flatten()

            if fb0 is not None:
                fb0 = u1.copy()

            # from new state s1 compute next input u2 -> u1
            u1 = self.compute_outputs([s0])[0][:, -1].reshape(1, -1)
            outputs[i, :] = u1.flatten()

        return outputs, states, warming_outputs, warming_states
Esempio n. 27
0
        return data


if os.path.exists("bench.pkl"):
    with open("bench.pkl", "rb") as f:
        results = pickle.load(f)
else:
    npars = (1, 2, 3, 4, 6, 10, 20, 30, 40, 60, 100)

    from numpy.random import SeedSequence
    from concurrent.futures import ProcessPoolExecutor as Pool

    sg = SeedSequence(1)
    with Pool() as p:
        results = tuple(p.map(Runner(npars), sg.spawn(16)))

    with open("bench.pkl", "wb") as f:
        pickle.dump(results, f)


# plt.figure()
# f = TrackingFcn(default_rng(), 2)
# x = np.linspace(-10, 10)
# X, Y = np.meshgrid(x, x)
# F = np.empty_like(X)
# for i, xi in enumerate(x):
#     for j, yi in enumerate(x):
#         F[i, j] = f((xi, yi))
# plt.pcolormesh(X, Y, F.T)
# plt.colorbar()
Esempio n. 28
0
    def run(self, data, init_location=0, n_jobs=1, verbose=False):
        """Gets and prints the spreadsheet's header columns

        Parameters
        ----------
        data : array_like or list
            Square matrix of the input data, e.g. a distance matrix.
        init_location : int, optional
            the index of the starting point, e.g. the first node of a path
            (default is 0).
        n_jobs : int, optional
            The number of parallel processes. Use none or a negative number to
            automatically determine the maximum number of cores available
            (default is 1).
        verbose : bool, optional
            for additional output information during runtime
            (default is False).

        Returns
        -------
        (array_like, int)
            The indices of the best best solution (excluding the starting
            position) and its cost.
        """

        # initialize cost matrix and pheromone matrix
        self._cost = np.array(data, dtype=np.float64)
        np.fill_diagonal(self._cost, np.inf)
        self._pheromone = np.full_like(data,
                                       self.init_pheromone,
                                       dtype=np.float64)

        # set constants
        self._SIZE = len(data)
        self._INDICES = np.arange(self._SIZE)
        self.init_location = init_location

        # number of processes
        if n_jobs < 0 or n_jobs is None:
            n_jobs = cpu_count()
        n_jobs = max(1, round(n_jobs))

        # instanciate random number generators
        ss = SeedSequence()
        if n_jobs > 1:
            seeds = ss.spawn(self.n_ants)
            streams = [default_rng(s) for s in seeds]
        else:
            rng = default_rng()

        # main loop
        all_time_shortest_path = None
        all_time_lowest_cost = np.inf
        last_iteration_lowest_cost = np.inf
        convergence = 0
        for i in tqdm(range(self.max_iterations)):
            # generate a solution for each ant
            if n_jobs > 1:
                with Pool(n_jobs) as p:
                    solutions = p.map(self._generate_solution, streams)
            else:
                solutions = [
                    self._generate_solution(rng) for _ in range(self.n_ants)
                ]

            # sort solutions ascending by cost
            routes, costs = zip(*sorted(solutions, key=lambda x: x[1]))
            lowest_cost_in_iter = costs[0]

            # update and check convergence criterion
            if lowest_cost_in_iter == last_iteration_lowest_cost:
                convergence += 1
                if convergence >= 10:  # *** TODO: find better way ***
                    break
            else:
                convergence = 0
                last_iteration_lowest_cost = lowest_cost_in_iter

            # update cost and variables if necessary
            if lowest_cost_in_iter < all_time_lowest_cost:
                all_time_lowest_cost = lowest_cost_in_iter
                all_time_shortest_path = routes[0]

            # update pheromone matrix based on best solutions
            self._pheromone_update(routes[:self.n_best], costs[:self.n_best])

            # misc
            if (verbose):
                print(f'EPOCH {i + 1}: {all_time_lowest_cost}')

            # end of main loop

        print(
            f'converged after {i + 1} iterations with result: {all_time_lowest_cost} \n'
        )
        return all_time_shortest_path, all_time_lowest_cost
Esempio n. 29
0
from numpy.random import Generator, PCG64, SeedSequence
sg = SeedSequence(1234)
rg = [Generator(PCG64(s)) for s in sg.spawn(10)]
Esempio n. 30
0
def RunTMCMC(N,
             AllPars,
             Nm_steps_max,
             Nm_steps_maxmax,
             log_likelihood,
             variables,
             workdirMain,
             seed,
             calibrationData,
             numExperiments,
             covarianceMatrixList,
             edpNamesList,
             edpLengthsList,
             scaleFactors,
             shiftFactors,
             run_type,
             logFile,
             MPI_size,
             parallelizeMCMC=True):
    """ Runs TMCMC Algorithm """

    # Initialize (beta, effective sample size)
    beta = 0
    ESS = N
    mytrace = []

    totalNumberOfModelEvaluations = N

    # Initialize other TMCMC variables
    Nm_steps = Nm_steps_max
    Adap_calc_Nsteps = 'yes'  # yes or no
    Adap_scale_cov = 'yes'  # yes or no
    scalem = 1  # cov scale factor
    evidence = 1  # model evidence
    stageNum = 0  # stage number of TMCMC

    logFile.write('\n\n\t\t==========================')
    logFile.write("\n\t\tStage number: {}".format(stageNum))
    logFile.write("\n\t\tSampling from prior")
    logFile.write("\n\t\tbeta = 0")
    logFile.write("\n\t\tESS = %d" % ESS)
    logFile.write("\n\t\tscalem = %.2f" % scalem)
    logFile.write(
        "\n\n\t\tNumber of model evaluations in this stage: {}".format(N))
    logFile.flush()
    os.fsync(logFile.fileno())

    # initial samples
    Sm = tmcmcFunctions.initial_population(N, AllPars)

    # Evaluate posterior at Sm
    Priorm = np.array([tmcmcFunctions.log_prior(s, AllPars)
                       for s in Sm]).squeeze()
    Postm = Priorm  # prior = post for beta = 0

    # Evaluate log-likelihood at current samples Sm
    logFile.write("\n\n\t\tRun type: {}".format(run_type))
    if parallelizeMCMC:
        if run_type == "runningLocal":
            procCount = mp.cpu_count()
            pool = Pool(processes=procCount)
            logFile.write(
                "\n\n\t\tCreated multiprocessing pool for runType: {}".format(
                    run_type))
            logFile.write("\n\t\t\tNumber of processors being used: {}".format(
                procCount))
            Lmt = pool.starmap(
                runFEM,
                [(ind, Sm[ind], variables, workdirMain, log_likelihood,
                  calibrationData, numExperiments, covarianceMatrixList,
                  edpNamesList, edpLengthsList, scaleFactors, shiftFactors)
                 for ind in range(N)],
            )
        else:
            from mpi4py.futures import MPIPoolExecutor
            executor = MPIPoolExecutor(max_workers=MPI_size)
            logFile.write(
                "\n\n\t\tCreated mpi4py executor pool for runType: {}".format(
                    run_type))
            logFile.write("\n\t\t\tmax_workers: {}".format(MPI_size))
            iterables = [
                (ind, Sm[ind], variables, workdirMain, log_likelihood,
                 calibrationData, numExperiments, covarianceMatrixList,
                 edpNamesList, edpLengthsList, scaleFactors, shiftFactors)
                for ind in range(N)
            ]
            Lmt = list(executor.starmap(runFEM, iterables))
        Lm = np.array(Lmt).squeeze()
    else:
        logFile.write("\n\n\t\tNot parallelized")
        logFile.write("\n\t\t\tNumber of processors being used: {}".format(1))
        Lm = np.array([
            runFEM(ind, Sm[ind], variables, workdirMain, log_likelihood,
                   calibrationData, numExperiments, covarianceMatrixList,
                   edpNamesList, edpLengthsList, scaleFactors, shiftFactors)
            for ind in range(N)
        ]).squeeze()

    logFile.write(
        "\n\n\t\tTotal number of model evaluations so far: {}".format(
            totalNumberOfModelEvaluations))

    # Write the results of the first stage to a file named dakotaTabPrior.out for quoFEM to be able to read the results
    logFile.write(
        "\n\n\t\tWriting prior samples to 'dakotaTabPrior.out' for quoFEM to read the results"
    )
    tabFilePath = os.path.join(workdirMain, "dakotaTabPrior.out")

    writeOutputs = True
    # Create the headings, which will be the first line of the file
    logFile.write("\n\t\t\tCreating headings")
    headings = 'eval_id\tinterface\t'
    for v in variables['names']:
        headings += '{}\t'.format(v)
    if writeOutputs:  # create headings for outputs
        for i, edp in enumerate(edpNamesList):
            if edpLengthsList[i] == 1:
                headings += '{}\t'.format(edp)
            else:
                for comp in range(edpLengthsList[i]):
                    headings += '{}_{}\t'.format(edp, comp + 1)
    headings += '\n'

    # Get the data from the first stage
    logFile.write("\n\t\t\tGetting data from first stage")
    dataToWrite = Sm

    logFile.write("\n\t\t\tWriting to file {}".format(tabFilePath))
    with open(tabFilePath, "w") as f:
        f.write(headings)
        for i in range(N):
            string = "{}\t{}\t".format(i + 1, 1)
            for j in range(len(variables['names'])):
                string += "{}\t".format(dataToWrite[i, j])
            if writeOutputs:  # write the output data
                workdirString = ("workdir." + str(i + 1))
                prediction = np.atleast_2d(
                    np.genfromtxt(
                        os.path.join(workdirMain, workdirString,
                                     'results.out'))).reshape((1, -1))
                for predNum in range(np.shape(prediction)[1]):
                    string += "{}\t".format(prediction[0, predNum])
            string += "\n"
            f.write(string)

    logFile.write('\n\t\t==========================')
    logFile.flush()
    os.fsync(logFile.fileno())

    while beta < 1:
        # adaptively compute beta s.t. ESS = N/2 or ESS = 0.95*prev_ESS
        # plausible weights of Sm corresponding to new beta
        beta, Wm, ESS = tmcmcFunctions.compute_beta(beta,
                                                    Lm,
                                                    ESS,
                                                    threshold=0.95)
        # beta, Wm, ESS = tmcmcFunctions.compute_beta(beta, Lm, ESS, threshold=0.5)

        stageNum += 1

        # seed to reproduce results
        ss = SeedSequence(seed)
        child_seeds = ss.spawn(N + 1)

        # update model evidence
        evidence = evidence * (sum(Wm) / N)

        # Calculate covariance matrix using Wm_n
        Cm = np.cov(Sm, aweights=Wm / sum(Wm), rowvar=False)
        # logFile.write("\nCovariance matrix: {}".format(Cm))

        # Resample ###################################################
        # Resampling using plausible weights
        # SmcapIDs = np.random.choice(range(N), N, p=Wm / sum(Wm))
        rng = default_rng(child_seeds[-1])
        SmcapIDs = rng.choice(range(N), N, p=Wm / sum(Wm))
        # SmcapIDs = resampling.stratified_resample(Wm_n)
        Smcap = Sm[SmcapIDs]
        Lmcap = Lm[SmcapIDs]
        Postmcap = Postm[SmcapIDs]

        # save to trace
        # stage m: samples, likelihood, weights, next stage ESS, next stage beta, resampled samples
        mytrace.append([Sm, Lm, Wm, ESS, beta, Smcap])

        # Write Data to '.csv' files
        dataToWrite = mytrace[stageNum - 1][0]
        logFile.write(
            "\n\n\t\tWriting samples from stage {} to csv file".format(
                stageNum - 1))

        stringToAppend = 'resultsStage{}.csv'.format(stageNum - 1)
        resultsFilePath = os.path.join(os.path.abspath(workdirMain),
                                       stringToAppend)

        with open(resultsFilePath, 'w', newline='') as csvfile:
            csvWriter = csv.writer(csvfile)
            csvWriter.writerows(dataToWrite)
        logFile.write("\n\t\t\tWrote to file {}".format(resultsFilePath))
        # Finished writing data

        logFile.write('\n\n\t\t==========================')
        logFile.write("\n\t\tStage number: {}".format(stageNum))
        if beta < 1e-7:
            logFile.write("\n\t\tbeta = %9.6e" % beta)
        else:
            logFile.write("\n\t\tbeta = %9.8f" % beta)
        logFile.write("\n\t\tESS = %d" % ESS)
        logFile.write("\n\t\tscalem = %.2f" % scalem)

        # Perturb ###################################################
        # perform MCMC starting at each Smcap (total: N) for Nm_steps
        Em = (scalem**2) * Cm  # Proposal dist covariance matrix

        numProposals = N * Nm_steps
        totalNumberOfModelEvaluations += numProposals
        logFile.write(
            "\n\n\t\tNumber of model evaluations in this stage: {}".format(
                numProposals))
        logFile.flush()
        os.fsync(logFile.fileno())

        numAccepts = 0
        if parallelizeMCMC:
            if run_type == "runningLocal":
                logFile.write("\n\n\t\tLocal run - MCMC steps")
                logFile.write(
                    "\n\t\t\tNumber of processors being used: {}".format(
                        procCount))
                results = pool.starmap(
                    tmcmcFunctions.MCMC_MH,
                    [(j1, Em, Nm_steps, Smcap[j1], Lmcap[j1], Postmcap[j1],
                      beta, numAccepts, AllPars, log_likelihood, variables,
                      workdirMain, default_rng(child_seeds[j1]),
                      calibrationData, numExperiments, covarianceMatrixList,
                      edpNamesList, edpLengthsList, scaleFactors, shiftFactors)
                     for j1 in range(N)],
                )
            else:
                logFile.write("\n\n\t\tRemote run - MCMC steps")
                logFile.write("\n\t\t\tmax_workers: {}".format(MPI_size))
                iterables = [
                    (j1, Em, Nm_steps, Smcap[j1], Lmcap[j1], Postmcap[j1],
                     beta, numAccepts, AllPars, log_likelihood, variables,
                     workdirMain, default_rng(child_seeds[j1]),
                     calibrationData, numExperiments, covarianceMatrixList,
                     edpNamesList, edpLengthsList, scaleFactors, shiftFactors)
                    for j1 in range(N)
                ]
                results = list(
                    executor.starmap(tmcmcFunctions.MCMC_MH, iterables))
        else:
            logFile.write("\n\n\t\tLocal run - MCMC steps, not parallelized")
            logFile.write(
                "\n\t\t\tNumber of processors being used: {}".format(1))
            results = [
                tmcmcFunctions.MCMC_MH(j1, Em, Nm_steps, Smcap[j1], Lmcap[j1],
                                       Postmcap[j1], beta, numAccepts, AllPars,
                                       log_likelihood, variables, workdirMain,
                                       default_rng(child_seeds[j1]),
                                       calibrationData, numExperiments,
                                       covarianceMatrixList, edpNamesList,
                                       edpLengthsList, scaleFactors,
                                       shiftFactors) for j1 in range(N)
            ]

        Sm1, Lm1, Postm1, numAcceptsS, all_proposals, all_PLP = zip(*results)
        Sm1 = np.asarray(Sm1)
        Lm1 = np.asarray(Lm1)
        Postm1 = np.asarray(Postm1)
        numAcceptsS = np.asarray(numAcceptsS)
        numAccepts = sum(numAcceptsS)
        all_proposals = np.asarray(all_proposals)
        all_PLP = np.asarray(all_PLP)

        logFile.write(
            "\n\n\t\tTotal number of model evaluations so far: {}".format(
                totalNumberOfModelEvaluations))

        # total observed acceptance rate
        R = numAccepts / numProposals
        if R < 1e-5:
            logFile.write("\n\n\t\tacceptance rate = %9.5e" % R)
        else:
            logFile.write("\n\n\t\tacceptance rate = %.6f" % R)

        # Calculate Nm_steps based on observed acceptance rate
        if Adap_calc_Nsteps == 'yes':
            # increase max Nmcmc with stage number
            Nm_steps_max = min(Nm_steps_max + 1, Nm_steps_maxmax)
            logFile.write("\n\t\tadapted max MCMC steps = %d" % Nm_steps_max)

            acc_rate = max(1. / numProposals, R)
            Nm_steps = min(Nm_steps_max,
                           1 + int(np.log(1 - 0.99) / np.log(1 - acc_rate)))
            logFile.write("\n\t\tnext MCMC Nsteps = %d" % Nm_steps)

        logFile.write('\n\t\t==========================')

        # scale factor based on observed acceptance ratio
        if Adap_scale_cov == 'yes':
            scalem = (1 / 9) + ((8 / 9) * R)

        # for next beta
        Sm, Postm, Lm = Sm1, Postm1, Lm1

    # save to trace
    mytrace.append([Sm, Lm, np.ones(len(Wm)), 'notValid', 1, 'notValid'])

    # Write last stage data to '.csv' file
    dataToWrite = mytrace[stageNum][0]
    logFile.write(
        "\n\n\t\tWriting samples from stage {} to csv file".format(stageNum))

    stringToAppend = 'resultsStage{}.csv'.format(stageNum)
    resultsFilePath = os.path.join(os.path.abspath(workdirMain),
                                   stringToAppend)

    with open(resultsFilePath, 'w', newline='') as csvfile:
        csvWriter = csv.writer(csvfile)
        csvWriter.writerows(dataToWrite)
    logFile.write("\n\t\t\tWrote to file {}".format(resultsFilePath))

    if parallelizeMCMC == 'yes':
        if run_type == "runningLocal":
            pool.close()
            logFile.write(
                "\n\tClosed multiprocessing pool for runType: {}".format(
                    run_type))
        else:
            executor.shutdown()
            logFile.write(
                "\n\tShutdown mpi4py executor pool for runType: {}".format(
                    run_type))

    return mytrace