Python paripool Examples

Programming Language: Python

Namespace/Package Name: beat.parallel

Method/Function: paripool

Examples at hotexamples.com: 4

Python paripool - 4 examples found. These are the top rated real world Python examples of beat.parallel.paripool extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def geo_construct_gf_linear_patches(
        engine, datasets=None, targets=None, patches=None, nworkers=1):
    """
    Create geodetic Greens Function matrix for given patches.

    Parameters
    ----------
    engine : :class:`pyrocko.gf.seismosizer.LocalEngine`
        main path to directory containing the different Greensfunction stores
    datasets : list
        of :class:`heart.GeodeticDataset` for which the GFs are calculated
    targets : list
        of :class:`heart.GeodeticDataset`
    patches : :class:`FaultGeometry`
        fault object that may comprise of several sub-faults. thus forming a
        complex fault-geometry
    nworkers : int
        number of CPUs to use for processing
    """

    _, los_vectors, odws, _ = heart.concatenate_datasets(datasets)

    nsamples = odws.size
    npatches = len(patches)

    logger.debug('Using %i workers ...' % nworkers)

    shared_gflibrary = RawArray('d', npatches * nsamples)

    work = [
        (engine, None, targets, patch, patchidx, los_vectors, odws)
            for patchidx, patch in enumerate(patches)]

    p = parallel.paripool(
        _process_patch_geodetic, work,
        initializer=_init_shared,
        initargs=(shared_gflibrary, None), nprocs=nworkers)

    for res in p:
        pass

    # collect and store away
    gfmatrix = num.frombuffer(
        shared_gflibrary).reshape((npatches, nsamples))
    return gfmatrix

Example #2

Show file

File: base.py Project: mingzhaochina/beat

def iter_parallel_chains(draws,
                         step,
                         stage_path,
                         progressbar,
                         model,
                         n_jobs,
                         chains=None,
                         initializer=None,
                         initargs=(),
                         chunksize=None):
    """
    Do Metropolis sampling over all the chains with each chain being
    sampled 'draws' times. Parallel execution according to n_jobs.
    If jobs hang for any reason they are being killed after an estimated
    timeout. The chains in question are being rerun and the estimated timeout
    is added again.

    Parameters
    ----------
    draws : int
        number of steps that are taken within each Markov Chain
    step : step object of the sampler class, e.g.:
        :class:`beat.sampler.Metropolis`, :class:`beat.sampler.SMC`
    stage_path : str
        with absolute path to the directory where to store the sampling results
    progressbar : boolean
        flag for displaying a progressbar
    model : :class:`pymc3.model.Model` instance
        holds definition of the forward problem
    n_jobs : int
        number of jobs to run in parallel, must not be higher than the
        number of CPUs
    chains : list
        of integers to the chain numbers, if None then all chains from the
        step object are sampled
    initializer : function
        to run before execution of each sampling process
    initargs : tuple
        of arguments for the initializer
    chunksize : int
        number of chains to sample within each process

    Returns
    -------
    MultiTrace object
    """
    timeout = 0

    if chains is None:
        chains = list(range(step.n_chains))

    n_chains = len(chains)

    if n_chains == 0:
        mtrace = backend.load_multitrace(dirname=stage_path, model=model)

    # while is necessary if any worker times out - rerun in case
    while n_chains > 0:
        trace_list = []

        logger.info('Initialising %i chain traces ...' % n_chains)
        for chain in chains:
            trace_list.append(backend.TextChain(stage_path, model=model))

        max_int = np.iinfo(np.int32).max
        random_seeds = [randint(max_int) for _ in range(n_chains)]

        work = [
            (draws, step, step.population[step.resampling_indexes[chain]],
             trace, chain, None, progressbar, model, rseed)
            for chain, rseed, trace in zip(chains, random_seeds, trace_list)
        ]

        tps = step.time_per_sample(np.minimum(n_jobs, 10))
        logger.info('Serial time per sample: %f' % tps)

        if chunksize is None:
            if draws < 10:
                chunksize = int(np.ceil(float(n_chains) / n_jobs))
            elif draws > 10 and tps < 1.:
                chunksize = int(np.ceil(float(n_chains) / n_jobs))
            else:
                chunksize = n_jobs

        timeout += int(np.ceil(tps * draws)) * n_jobs + 10

        if n_jobs > 1:
            shared_params = [
                sparam for sparam in step.logp_forw.get_shared()
                if sparam.name in parallel._tobememshared
            ]

            logger.info('Data to be memory shared: %s' %
                        list2string(shared_params))

            if len(shared_params) > 0:
                if len(parallel._shared_memory.keys()) == 0:
                    logger.info('Putting data into shared memory ...')
                    parallel.memshare_sparams(shared_params)
                else:
                    logger.info('Data already in shared memory!')

            else:
                logger.info('No data to be memshared!')

        else:
            logger.info('Not using shared memory.')

        p = parallel.paripool(_sample,
                              work,
                              chunksize=chunksize,
                              timeout=timeout,
                              nprocs=n_jobs,
                              initializer=initializer,
                              initargs=initargs)

        logger.info('Sampling ...')

        for res in p:
            pass

        # return chain indexes that have been corrupted
        mtrace = backend.load_multitrace(dirname=stage_path, model=model)
        corrupted_chains = backend.check_multitrace(mtrace,
                                                    draws=draws,
                                                    n_chains=step.n_chains)

        n_chains = len(corrupted_chains)

        if n_chains > 0:
            logger.warning('%i Chains not finished sampling,'
                           ' restarting ...' % n_chains)

        chains = corrupted_chains

    return mtrace

Example #3

Show file

File: base.py Project: wangyf/beat

def seis_construct_gf_linear(engine,
                             fault,
                             durations_prior,
                             velocities_prior,
                             nucleation_time_prior,
                             varnames,
                             wavemap,
                             event,
                             nworkers=1,
                             starttime_sampling=1.,
                             duration_sampling=1.,
                             sample_rate=1.,
                             outdirectory='./',
                             force=False):
    """
    Create seismic Greens Function matrix for defined source geometry
    by convolution of the GFs with the source time function (STF).

    Parameters
    ----------
    engine : :class:`pyrocko.gf.seismosizer.LocalEngine`
        main path to directory containing the different Greensfunction stores
    targets : list
        of pyrocko target objects for respective phase to compute
    wavemap : :class:`heart.WaveformMapping`
        configuration parameters for handeling seismic data around Phase
    fault : :class:`FaultGeometry`
        fault object that may comprise of several sub-faults. thus forming a
        complex fault-geometry
    durations_prior : :class:`heart.Parameter`
        prior of durations of the STF for each patch to convolve
    velocities_prior : :class:`heart.Parameter`
        rupture velocity of earthquake prior
    nucleation_time_prior : :class:`heart.Parameter`
        prior of nucleation time of the event
    starttime_sampling : float
        incremental step size for precalculation of startime GFs
    duration_sampling : float
        incremental step size for precalculation of duration GFs
    sample_rate : float
        sample rate of synthetic traces to produce,
        related to non-linear GF store
    outpath : str
        directory for storage
    force : boolean
        flag to overwrite existing linear GF Library
    """

    # get starttimes for hypocenter at corner of fault
    # TODO: make nsubfaults compatible

    npw, npl = fault.get_subfault_discretization(0)
    start_times = fault.get_subfault_starttimes(
        index=0,
        rupture_velocities=velocities_prior.lower.repeat(npw * npl),
        nuc_dip_idx=0,
        nuc_strike_idx=0)

    starttimeidxs = num.arange(
        int(
            num.floor(start_times.min() + nucleation_time_prior.lower.min()) /
            starttime_sampling),
        int(
            num.ceil(start_times.max() + nucleation_time_prior.upper.max()) /
            starttime_sampling) + 1)
    starttimes = starttimeidxs * starttime_sampling

    ndurations = error_not_whole(
        ((durations_prior.upper.max() - durations_prior.lower.min()) /
         duration_sampling),
        errstr='ndurations') + 1

    durations = num.linspace(durations_prior.lower.min(),
                             durations_prior.upper.max(), ndurations)

    logger.info('Calculating GFs for starttimes: %s \n durations: %s' %
                (list2string(starttimes), list2string(durations)))
    logger.info('Using %i workers ...' % nworkers)

    nstarttimes = len(starttimes)
    npatches = fault.npatches
    ntargets = len(wavemap.targets)
    nsamples = wavemap.config.arrival_taper.nsamples(sample_rate)

    for var in varnames:
        logger.info('For slip component: %s' % var)

        gfl_config = SeismicGFLibraryConfig(
            component=var,
            datatype='seismic',
            event=event,
            reference_sources=fault.get_all_subfaults(datatype='seismic',
                                                      component=var),
            duration_sampling=duration_sampling,
            starttime_sampling=starttime_sampling,
            wave_config=wavemap.config,
            dimensions=(ntargets, npatches, ndurations, nstarttimes, nsamples),
            starttime_min=float(starttimes.min()),
            duration_min=float(durations.min()))

        gfs = SeismicGFLibrary(config=gfl_config)

        outpath = os.path.join(outdirectory, gfs.filename + '.npz')

        if os.path.exists(outpath) and not force:
            logger.info('Library exists: %s. '
                        'Please use --force to override!' % outpath)
        else:
            if nworkers < 2:
                allocate = True
            else:
                allocate = False

            gfs.setup(ntargets,
                      npatches,
                      ndurations,
                      nstarttimes,
                      nsamples,
                      allocate=allocate)

            logger.info("Setting up Green's Function Library: %s \n ",
                        gfs.__str__())

            parallel.check_available_memory(gfs.filesize)

            shared_gflibrary = RawArray('d', gfs.size)
            shared_times = RawArray('d', gfs.ntargets)

            work = [(engine, gfs, wavemap.targets, patch, patchidx, durations,
                     starttimes) for patchidx, patch in enumerate(
                         fault.get_all_patches('seismic', component=var))]

            p = parallel.paripool(_process_patch_seismic,
                                  work,
                                  initializer=_init_shared,
                                  initargs=(shared_gflibrary, shared_times),
                                  nprocs=nworkers)

            for res in p:
                pass

            # collect and store away
            gfs._gfmatrix = num.frombuffer(shared_gflibrary).reshape(
                gfs.dimensions)
            gfs._tmins = num.frombuffer(shared_times).reshape((gfs.ntargets))

            logger.info('Storing seismic linear GF Library ...')

            gfs.save(outdir=outdirectory)
            del gfs

Example #4

Show file

File: base.py Project: wangyf/beat

def geo_construct_gf_linear(engine,
                            outdirectory,
                            crust_ind=0,
                            datasets=None,
                            targets=None,
                            fault=None,
                            varnames=[''],
                            force=False,
                            event=None,
                            nworkers=1):
    """
    Create geodetic Greens Function matrix for defined source geometry.

    Parameters
    ----------
    engine : :class:`pyrocko.gf.seismosizer.LocalEngine`
        main path to directory containing the different Greensfunction stores
    outpath : str
        absolute path to the directory and filename where to store the
        Green's Functions
    crust_ind : int
        of index of Greens Function store to use
    datasets : list
        of :class:`heart.GeodeticDataset` for which the GFs are calculated
    targets : list
        of :class:`heart.GeodeticDataset`
    fault : :class:`FaultGeometry`
        fault object that may comprise of several sub-faults. thus forming a
        complex fault-geometry
    varnames : list
        of str with variable names that are being optimized for
    force : bool
        Force to overwrite existing files.
    """

    _, los_vectors, odws, _ = heart.concatenate_datasets(datasets)

    nsamples = odws.size
    npatches = fault.npatches
    logger.info('Using %i workers ...' % nworkers)

    for var in varnames:
        logger.info('For slip component: %s' % var)

        gfl_config = GeodeticGFLibraryConfig(
            component=var,
            dimensions=(npatches, nsamples),
            event=event,
            crust_ind=crust_ind,
            datatype='geodetic',
            reference_sources=fault.get_all_subfaults(datatype='geodetic',
                                                      component=var))
        gfs = GeodeticGFLibrary(config=gfl_config)

        outpath = os.path.join(outdirectory, gfs.filename + '.npz')

        if os.path.exists(outpath) and not force:
            logger.info('Library exists: %s. '
                        'Please use --force to override!' % outpath)

        else:
            if nworkers < 2:
                allocate = True
            else:
                allocate = False

            gfs.setup(npatches, nsamples, allocate=allocate)

            logger.info("Setting up Green's Function Library: %s \n ",
                        gfs.__str__())

            parallel.check_available_memory(gfs.filesize)

            shared_gflibrary = RawArray('d', gfs.size)

            work = [(engine, gfs, targets, patch, patchidx, los_vectors, odws)
                    for patchidx, patch in enumerate(
                        fault.get_all_patches('geodetic', component=var))]

            p = parallel.paripool(_process_patch_geodetic,
                                  work,
                                  initializer=_init_shared,
                                  initargs=(shared_gflibrary, None),
                                  nprocs=nworkers)

            for res in p:
                pass

            # collect and store away
            gfs._gfmatrix = num.frombuffer(shared_gflibrary).reshape(
                gfs.dimensions)

            logger.info('Storing geodetic linear GF Library ...')

            gfs.save(outdir=outdirectory)