Esempio n. 1
0
def geo_construct_gf_linear_patches(
        engine, datasets=None, targets=None, patches=None, nworkers=1):
    """
    Create geodetic Greens Function matrix for given patches.

    Parameters
    ----------
    engine : :class:`pyrocko.gf.seismosizer.LocalEngine`
        main path to directory containing the different Greensfunction stores
    datasets : list
        of :class:`heart.GeodeticDataset` for which the GFs are calculated
    targets : list
        of :class:`heart.GeodeticDataset`
    patches : :class:`FaultGeometry`
        fault object that may comprise of several sub-faults. thus forming a
        complex fault-geometry
    nworkers : int
        number of CPUs to use for processing
    """

    _, los_vectors, odws, _ = heart.concatenate_datasets(datasets)

    nsamples = odws.size
    npatches = len(patches)

    logger.debug('Using %i workers ...' % nworkers)

    shared_gflibrary = RawArray('d', npatches * nsamples)

    work = [
        (engine, None, targets, patch, patchidx, los_vectors, odws)
            for patchidx, patch in enumerate(patches)]

    p = parallel.paripool(
        _process_patch_geodetic, work,
        initializer=_init_shared,
        initargs=(shared_gflibrary, None), nprocs=nworkers)

    for res in p:
        pass

    # collect and store away
    gfmatrix = num.frombuffer(
        shared_gflibrary).reshape((npatches, nsamples))
    return gfmatrix
Esempio n. 2
0
def iter_parallel_chains(draws,
                         step,
                         stage_path,
                         progressbar,
                         model,
                         n_jobs,
                         chains=None,
                         initializer=None,
                         initargs=(),
                         chunksize=None):
    """
    Do Metropolis sampling over all the chains with each chain being
    sampled 'draws' times. Parallel execution according to n_jobs.
    If jobs hang for any reason they are being killed after an estimated
    timeout. The chains in question are being rerun and the estimated timeout
    is added again.

    Parameters
    ----------
    draws : int
        number of steps that are taken within each Markov Chain
    step : step object of the sampler class, e.g.:
        :class:`beat.sampler.Metropolis`, :class:`beat.sampler.SMC`
    stage_path : str
        with absolute path to the directory where to store the sampling results
    progressbar : boolean
        flag for displaying a progressbar
    model : :class:`pymc3.model.Model` instance
        holds definition of the forward problem
    n_jobs : int
        number of jobs to run in parallel, must not be higher than the
        number of CPUs
    chains : list
        of integers to the chain numbers, if None then all chains from the
        step object are sampled
    initializer : function
        to run before execution of each sampling process
    initargs : tuple
        of arguments for the initializer
    chunksize : int
        number of chains to sample within each process

    Returns
    -------
    MultiTrace object
    """
    timeout = 0

    if chains is None:
        chains = list(range(step.n_chains))

    n_chains = len(chains)

    if n_chains == 0:
        mtrace = backend.load_multitrace(dirname=stage_path, model=model)

    # while is necessary if any worker times out - rerun in case
    while n_chains > 0:
        trace_list = []

        logger.info('Initialising %i chain traces ...' % n_chains)
        for chain in chains:
            trace_list.append(backend.TextChain(stage_path, model=model))

        max_int = np.iinfo(np.int32).max
        random_seeds = [randint(max_int) for _ in range(n_chains)]

        work = [
            (draws, step, step.population[step.resampling_indexes[chain]],
             trace, chain, None, progressbar, model, rseed)
            for chain, rseed, trace in zip(chains, random_seeds, trace_list)
        ]

        tps = step.time_per_sample(np.minimum(n_jobs, 10))
        logger.info('Serial time per sample: %f' % tps)

        if chunksize is None:
            if draws < 10:
                chunksize = int(np.ceil(float(n_chains) / n_jobs))
            elif draws > 10 and tps < 1.:
                chunksize = int(np.ceil(float(n_chains) / n_jobs))
            else:
                chunksize = n_jobs

        timeout += int(np.ceil(tps * draws)) * n_jobs + 10

        if n_jobs > 1:
            shared_params = [
                sparam for sparam in step.logp_forw.get_shared()
                if sparam.name in parallel._tobememshared
            ]

            logger.info('Data to be memory shared: %s' %
                        list2string(shared_params))

            if len(shared_params) > 0:
                if len(parallel._shared_memory.keys()) == 0:
                    logger.info('Putting data into shared memory ...')
                    parallel.memshare_sparams(shared_params)
                else:
                    logger.info('Data already in shared memory!')

            else:
                logger.info('No data to be memshared!')

        else:
            logger.info('Not using shared memory.')

        p = parallel.paripool(_sample,
                              work,
                              chunksize=chunksize,
                              timeout=timeout,
                              nprocs=n_jobs,
                              initializer=initializer,
                              initargs=initargs)

        logger.info('Sampling ...')

        for res in p:
            pass

        # return chain indexes that have been corrupted
        mtrace = backend.load_multitrace(dirname=stage_path, model=model)
        corrupted_chains = backend.check_multitrace(mtrace,
                                                    draws=draws,
                                                    n_chains=step.n_chains)

        n_chains = len(corrupted_chains)

        if n_chains > 0:
            logger.warning('%i Chains not finished sampling,'
                           ' restarting ...' % n_chains)

        chains = corrupted_chains

    return mtrace
Esempio n. 3
0
File: base.py Progetto: wangyf/beat
def seis_construct_gf_linear(engine,
                             fault,
                             durations_prior,
                             velocities_prior,
                             nucleation_time_prior,
                             varnames,
                             wavemap,
                             event,
                             nworkers=1,
                             starttime_sampling=1.,
                             duration_sampling=1.,
                             sample_rate=1.,
                             outdirectory='./',
                             force=False):
    """
    Create seismic Greens Function matrix for defined source geometry
    by convolution of the GFs with the source time function (STF).

    Parameters
    ----------
    engine : :class:`pyrocko.gf.seismosizer.LocalEngine`
        main path to directory containing the different Greensfunction stores
    targets : list
        of pyrocko target objects for respective phase to compute
    wavemap : :class:`heart.WaveformMapping`
        configuration parameters for handeling seismic data around Phase
    fault : :class:`FaultGeometry`
        fault object that may comprise of several sub-faults. thus forming a
        complex fault-geometry
    durations_prior : :class:`heart.Parameter`
        prior of durations of the STF for each patch to convolve
    velocities_prior : :class:`heart.Parameter`
        rupture velocity of earthquake prior
    nucleation_time_prior : :class:`heart.Parameter`
        prior of nucleation time of the event
    starttime_sampling : float
        incremental step size for precalculation of startime GFs
    duration_sampling : float
        incremental step size for precalculation of duration GFs
    sample_rate : float
        sample rate of synthetic traces to produce,
        related to non-linear GF store
    outpath : str
        directory for storage
    force : boolean
        flag to overwrite existing linear GF Library
    """

    # get starttimes for hypocenter at corner of fault
    # TODO: make nsubfaults compatible

    npw, npl = fault.get_subfault_discretization(0)
    start_times = fault.get_subfault_starttimes(
        index=0,
        rupture_velocities=velocities_prior.lower.repeat(npw * npl),
        nuc_dip_idx=0,
        nuc_strike_idx=0)

    starttimeidxs = num.arange(
        int(
            num.floor(start_times.min() + nucleation_time_prior.lower.min()) /
            starttime_sampling),
        int(
            num.ceil(start_times.max() + nucleation_time_prior.upper.max()) /
            starttime_sampling) + 1)
    starttimes = starttimeidxs * starttime_sampling

    ndurations = error_not_whole(
        ((durations_prior.upper.max() - durations_prior.lower.min()) /
         duration_sampling),
        errstr='ndurations') + 1

    durations = num.linspace(durations_prior.lower.min(),
                             durations_prior.upper.max(), ndurations)

    logger.info('Calculating GFs for starttimes: %s \n durations: %s' %
                (list2string(starttimes), list2string(durations)))
    logger.info('Using %i workers ...' % nworkers)

    nstarttimes = len(starttimes)
    npatches = fault.npatches
    ntargets = len(wavemap.targets)
    nsamples = wavemap.config.arrival_taper.nsamples(sample_rate)

    for var in varnames:
        logger.info('For slip component: %s' % var)

        gfl_config = SeismicGFLibraryConfig(
            component=var,
            datatype='seismic',
            event=event,
            reference_sources=fault.get_all_subfaults(datatype='seismic',
                                                      component=var),
            duration_sampling=duration_sampling,
            starttime_sampling=starttime_sampling,
            wave_config=wavemap.config,
            dimensions=(ntargets, npatches, ndurations, nstarttimes, nsamples),
            starttime_min=float(starttimes.min()),
            duration_min=float(durations.min()))

        gfs = SeismicGFLibrary(config=gfl_config)

        outpath = os.path.join(outdirectory, gfs.filename + '.npz')

        if os.path.exists(outpath) and not force:
            logger.info('Library exists: %s. '
                        'Please use --force to override!' % outpath)
        else:
            if nworkers < 2:
                allocate = True
            else:
                allocate = False

            gfs.setup(ntargets,
                      npatches,
                      ndurations,
                      nstarttimes,
                      nsamples,
                      allocate=allocate)

            logger.info("Setting up Green's Function Library: %s \n ",
                        gfs.__str__())

            parallel.check_available_memory(gfs.filesize)

            shared_gflibrary = RawArray('d', gfs.size)
            shared_times = RawArray('d', gfs.ntargets)

            work = [(engine, gfs, wavemap.targets, patch, patchidx, durations,
                     starttimes) for patchidx, patch in enumerate(
                         fault.get_all_patches('seismic', component=var))]

            p = parallel.paripool(_process_patch_seismic,
                                  work,
                                  initializer=_init_shared,
                                  initargs=(shared_gflibrary, shared_times),
                                  nprocs=nworkers)

            for res in p:
                pass

            # collect and store away
            gfs._gfmatrix = num.frombuffer(shared_gflibrary).reshape(
                gfs.dimensions)
            gfs._tmins = num.frombuffer(shared_times).reshape((gfs.ntargets))

            logger.info('Storing seismic linear GF Library ...')

            gfs.save(outdir=outdirectory)
            del gfs
Esempio n. 4
0
File: base.py Progetto: wangyf/beat
def geo_construct_gf_linear(engine,
                            outdirectory,
                            crust_ind=0,
                            datasets=None,
                            targets=None,
                            fault=None,
                            varnames=[''],
                            force=False,
                            event=None,
                            nworkers=1):
    """
    Create geodetic Greens Function matrix for defined source geometry.

    Parameters
    ----------
    engine : :class:`pyrocko.gf.seismosizer.LocalEngine`
        main path to directory containing the different Greensfunction stores
    outpath : str
        absolute path to the directory and filename where to store the
        Green's Functions
    crust_ind : int
        of index of Greens Function store to use
    datasets : list
        of :class:`heart.GeodeticDataset` for which the GFs are calculated
    targets : list
        of :class:`heart.GeodeticDataset`
    fault : :class:`FaultGeometry`
        fault object that may comprise of several sub-faults. thus forming a
        complex fault-geometry
    varnames : list
        of str with variable names that are being optimized for
    force : bool
        Force to overwrite existing files.
    """

    _, los_vectors, odws, _ = heart.concatenate_datasets(datasets)

    nsamples = odws.size
    npatches = fault.npatches
    logger.info('Using %i workers ...' % nworkers)

    for var in varnames:
        logger.info('For slip component: %s' % var)

        gfl_config = GeodeticGFLibraryConfig(
            component=var,
            dimensions=(npatches, nsamples),
            event=event,
            crust_ind=crust_ind,
            datatype='geodetic',
            reference_sources=fault.get_all_subfaults(datatype='geodetic',
                                                      component=var))
        gfs = GeodeticGFLibrary(config=gfl_config)

        outpath = os.path.join(outdirectory, gfs.filename + '.npz')

        if os.path.exists(outpath) and not force:
            logger.info('Library exists: %s. '
                        'Please use --force to override!' % outpath)

        else:
            if nworkers < 2:
                allocate = True
            else:
                allocate = False

            gfs.setup(npatches, nsamples, allocate=allocate)

            logger.info("Setting up Green's Function Library: %s \n ",
                        gfs.__str__())

            parallel.check_available_memory(gfs.filesize)

            shared_gflibrary = RawArray('d', gfs.size)

            work = [(engine, gfs, targets, patch, patchidx, los_vectors, odws)
                    for patchidx, patch in enumerate(
                        fault.get_all_patches('geodetic', component=var))]

            p = parallel.paripool(_process_patch_geodetic,
                                  work,
                                  initializer=_init_shared,
                                  initargs=(shared_gflibrary, None),
                                  nprocs=nworkers)

            for res in p:
                pass

            # collect and store away
            gfs._gfmatrix = num.frombuffer(shared_gflibrary).reshape(
                gfs.dimensions)

            logger.info('Storing geodetic linear GF Library ...')

            gfs.save(outdir=outdirectory)