def geo_construct_gf_linear_patches( engine, datasets=None, targets=None, patches=None, nworkers=1): """ Create geodetic Greens Function matrix for given patches. Parameters ---------- engine : :class:`pyrocko.gf.seismosizer.LocalEngine` main path to directory containing the different Greensfunction stores datasets : list of :class:`heart.GeodeticDataset` for which the GFs are calculated targets : list of :class:`heart.GeodeticDataset` patches : :class:`FaultGeometry` fault object that may comprise of several sub-faults. thus forming a complex fault-geometry nworkers : int number of CPUs to use for processing """ _, los_vectors, odws, _ = heart.concatenate_datasets(datasets) nsamples = odws.size npatches = len(patches) logger.debug('Using %i workers ...' % nworkers) shared_gflibrary = RawArray('d', npatches * nsamples) work = [ (engine, None, targets, patch, patchidx, los_vectors, odws) for patchidx, patch in enumerate(patches)] p = parallel.paripool( _process_patch_geodetic, work, initializer=_init_shared, initargs=(shared_gflibrary, None), nprocs=nworkers) for res in p: pass # collect and store away gfmatrix = num.frombuffer( shared_gflibrary).reshape((npatches, nsamples)) return gfmatrix
def iter_parallel_chains(draws, step, stage_path, progressbar, model, n_jobs, chains=None, initializer=None, initargs=(), chunksize=None): """ Do Metropolis sampling over all the chains with each chain being sampled 'draws' times. Parallel execution according to n_jobs. If jobs hang for any reason they are being killed after an estimated timeout. The chains in question are being rerun and the estimated timeout is added again. Parameters ---------- draws : int number of steps that are taken within each Markov Chain step : step object of the sampler class, e.g.: :class:`beat.sampler.Metropolis`, :class:`beat.sampler.SMC` stage_path : str with absolute path to the directory where to store the sampling results progressbar : boolean flag for displaying a progressbar model : :class:`pymc3.model.Model` instance holds definition of the forward problem n_jobs : int number of jobs to run in parallel, must not be higher than the number of CPUs chains : list of integers to the chain numbers, if None then all chains from the step object are sampled initializer : function to run before execution of each sampling process initargs : tuple of arguments for the initializer chunksize : int number of chains to sample within each process Returns ------- MultiTrace object """ timeout = 0 if chains is None: chains = list(range(step.n_chains)) n_chains = len(chains) if n_chains == 0: mtrace = backend.load_multitrace(dirname=stage_path, model=model) # while is necessary if any worker times out - rerun in case while n_chains > 0: trace_list = [] logger.info('Initialising %i chain traces ...' % n_chains) for chain in chains: trace_list.append(backend.TextChain(stage_path, model=model)) max_int = np.iinfo(np.int32).max random_seeds = [randint(max_int) for _ in range(n_chains)] work = [ (draws, step, step.population[step.resampling_indexes[chain]], trace, chain, None, progressbar, model, rseed) for chain, rseed, trace in zip(chains, random_seeds, trace_list) ] tps = step.time_per_sample(np.minimum(n_jobs, 10)) logger.info('Serial time per sample: %f' % tps) if chunksize is None: if draws < 10: chunksize = int(np.ceil(float(n_chains) / n_jobs)) elif draws > 10 and tps < 1.: chunksize = int(np.ceil(float(n_chains) / n_jobs)) else: chunksize = n_jobs timeout += int(np.ceil(tps * draws)) * n_jobs + 10 if n_jobs > 1: shared_params = [ sparam for sparam in step.logp_forw.get_shared() if sparam.name in parallel._tobememshared ] logger.info('Data to be memory shared: %s' % list2string(shared_params)) if len(shared_params) > 0: if len(parallel._shared_memory.keys()) == 0: logger.info('Putting data into shared memory ...') parallel.memshare_sparams(shared_params) else: logger.info('Data already in shared memory!') else: logger.info('No data to be memshared!') else: logger.info('Not using shared memory.') p = parallel.paripool(_sample, work, chunksize=chunksize, timeout=timeout, nprocs=n_jobs, initializer=initializer, initargs=initargs) logger.info('Sampling ...') for res in p: pass # return chain indexes that have been corrupted mtrace = backend.load_multitrace(dirname=stage_path, model=model) corrupted_chains = backend.check_multitrace(mtrace, draws=draws, n_chains=step.n_chains) n_chains = len(corrupted_chains) if n_chains > 0: logger.warning('%i Chains not finished sampling,' ' restarting ...' % n_chains) chains = corrupted_chains return mtrace
def seis_construct_gf_linear(engine, fault, durations_prior, velocities_prior, nucleation_time_prior, varnames, wavemap, event, nworkers=1, starttime_sampling=1., duration_sampling=1., sample_rate=1., outdirectory='./', force=False): """ Create seismic Greens Function matrix for defined source geometry by convolution of the GFs with the source time function (STF). Parameters ---------- engine : :class:`pyrocko.gf.seismosizer.LocalEngine` main path to directory containing the different Greensfunction stores targets : list of pyrocko target objects for respective phase to compute wavemap : :class:`heart.WaveformMapping` configuration parameters for handeling seismic data around Phase fault : :class:`FaultGeometry` fault object that may comprise of several sub-faults. thus forming a complex fault-geometry durations_prior : :class:`heart.Parameter` prior of durations of the STF for each patch to convolve velocities_prior : :class:`heart.Parameter` rupture velocity of earthquake prior nucleation_time_prior : :class:`heart.Parameter` prior of nucleation time of the event starttime_sampling : float incremental step size for precalculation of startime GFs duration_sampling : float incremental step size for precalculation of duration GFs sample_rate : float sample rate of synthetic traces to produce, related to non-linear GF store outpath : str directory for storage force : boolean flag to overwrite existing linear GF Library """ # get starttimes for hypocenter at corner of fault # TODO: make nsubfaults compatible npw, npl = fault.get_subfault_discretization(0) start_times = fault.get_subfault_starttimes( index=0, rupture_velocities=velocities_prior.lower.repeat(npw * npl), nuc_dip_idx=0, nuc_strike_idx=0) starttimeidxs = num.arange( int( num.floor(start_times.min() + nucleation_time_prior.lower.min()) / starttime_sampling), int( num.ceil(start_times.max() + nucleation_time_prior.upper.max()) / starttime_sampling) + 1) starttimes = starttimeidxs * starttime_sampling ndurations = error_not_whole( ((durations_prior.upper.max() - durations_prior.lower.min()) / duration_sampling), errstr='ndurations') + 1 durations = num.linspace(durations_prior.lower.min(), durations_prior.upper.max(), ndurations) logger.info('Calculating GFs for starttimes: %s \n durations: %s' % (list2string(starttimes), list2string(durations))) logger.info('Using %i workers ...' % nworkers) nstarttimes = len(starttimes) npatches = fault.npatches ntargets = len(wavemap.targets) nsamples = wavemap.config.arrival_taper.nsamples(sample_rate) for var in varnames: logger.info('For slip component: %s' % var) gfl_config = SeismicGFLibraryConfig( component=var, datatype='seismic', event=event, reference_sources=fault.get_all_subfaults(datatype='seismic', component=var), duration_sampling=duration_sampling, starttime_sampling=starttime_sampling, wave_config=wavemap.config, dimensions=(ntargets, npatches, ndurations, nstarttimes, nsamples), starttime_min=float(starttimes.min()), duration_min=float(durations.min())) gfs = SeismicGFLibrary(config=gfl_config) outpath = os.path.join(outdirectory, gfs.filename + '.npz') if os.path.exists(outpath) and not force: logger.info('Library exists: %s. ' 'Please use --force to override!' % outpath) else: if nworkers < 2: allocate = True else: allocate = False gfs.setup(ntargets, npatches, ndurations, nstarttimes, nsamples, allocate=allocate) logger.info("Setting up Green's Function Library: %s \n ", gfs.__str__()) parallel.check_available_memory(gfs.filesize) shared_gflibrary = RawArray('d', gfs.size) shared_times = RawArray('d', gfs.ntargets) work = [(engine, gfs, wavemap.targets, patch, patchidx, durations, starttimes) for patchidx, patch in enumerate( fault.get_all_patches('seismic', component=var))] p = parallel.paripool(_process_patch_seismic, work, initializer=_init_shared, initargs=(shared_gflibrary, shared_times), nprocs=nworkers) for res in p: pass # collect and store away gfs._gfmatrix = num.frombuffer(shared_gflibrary).reshape( gfs.dimensions) gfs._tmins = num.frombuffer(shared_times).reshape((gfs.ntargets)) logger.info('Storing seismic linear GF Library ...') gfs.save(outdir=outdirectory) del gfs
def geo_construct_gf_linear(engine, outdirectory, crust_ind=0, datasets=None, targets=None, fault=None, varnames=[''], force=False, event=None, nworkers=1): """ Create geodetic Greens Function matrix for defined source geometry. Parameters ---------- engine : :class:`pyrocko.gf.seismosizer.LocalEngine` main path to directory containing the different Greensfunction stores outpath : str absolute path to the directory and filename where to store the Green's Functions crust_ind : int of index of Greens Function store to use datasets : list of :class:`heart.GeodeticDataset` for which the GFs are calculated targets : list of :class:`heart.GeodeticDataset` fault : :class:`FaultGeometry` fault object that may comprise of several sub-faults. thus forming a complex fault-geometry varnames : list of str with variable names that are being optimized for force : bool Force to overwrite existing files. """ _, los_vectors, odws, _ = heart.concatenate_datasets(datasets) nsamples = odws.size npatches = fault.npatches logger.info('Using %i workers ...' % nworkers) for var in varnames: logger.info('For slip component: %s' % var) gfl_config = GeodeticGFLibraryConfig( component=var, dimensions=(npatches, nsamples), event=event, crust_ind=crust_ind, datatype='geodetic', reference_sources=fault.get_all_subfaults(datatype='geodetic', component=var)) gfs = GeodeticGFLibrary(config=gfl_config) outpath = os.path.join(outdirectory, gfs.filename + '.npz') if os.path.exists(outpath) and not force: logger.info('Library exists: %s. ' 'Please use --force to override!' % outpath) else: if nworkers < 2: allocate = True else: allocate = False gfs.setup(npatches, nsamples, allocate=allocate) logger.info("Setting up Green's Function Library: %s \n ", gfs.__str__()) parallel.check_available_memory(gfs.filesize) shared_gflibrary = RawArray('d', gfs.size) work = [(engine, gfs, targets, patch, patchidx, los_vectors, odws) for patchidx, patch in enumerate( fault.get_all_patches('geodetic', component=var))] p = parallel.paripool(_process_patch_geodetic, work, initializer=_init_shared, initargs=(shared_gflibrary, None), nprocs=nworkers) for res in p: pass # collect and store away gfs._gfmatrix = num.frombuffer(shared_gflibrary).reshape( gfs.dimensions) logger.info('Storing geodetic linear GF Library ...') gfs.save(outdir=outdirectory)