def create_source_info(csm, source_data, h5): """ Creates source_info, source_wkt, trt_smrs, toms """ data = {} # src_id -> row wkts = [] lens = [] for sg in csm.src_groups: for src in sg: srcid = basename(src) trti = csm.full_lt.trti.get(src.tectonic_region_type, -1) lens.append(len(src.trt_smrs)) row = [srcid, src.grp_id, src.code, 0, 0, 0, trti, 0] wkts.append(getattr(src, '_wkt', '')) data[srcid] = row src.id = len(data) - 1 logging.info('There are %d groups and %d sources with len(trt_smrs)=%.2f', len(csm.src_groups), sum(len(sg) for sg in csm.src_groups), numpy.mean(lens)) csm.source_info = data # src_id -> row num_srcs = len(csm.source_info) # avoid hdf5 damned bug by creating source_info in advance h5.create_dataset('source_info', (num_srcs,), source_info_dt) h5['source_info'].attrs['atomic'] = any( grp.atomic for grp in csm.src_groups) h5['source_wkt'] = numpy.array(wkts, hdf5.vstr)
def zero_times(sources): # src.id -> nrups, nsites, time, weight calc_times = AccumDict(accum=numpy.zeros(4, F32)) for src in sources: row = calc_times[basename(src)] row[0] += src.num_ruptures row[1] += src.nsites row[3] += src.weight return calc_times
def sample_ruptures(sources, cmaker, sitecol=None, monitor=Monitor()): """ :param sources: a sequence of sources of the same group :param cmaker: a ContextMaker instance with ses_per_logic_tree_path, ses_seed :param sitecol: SiteCollection instance used for filtering (None for no filtering) :param monitor: monitor instance :yields: dictionaries with keys rup_array, calc_times """ srcfilter = SourceFilter(sitecol, cmaker.maximum_distance) # AccumDict of arrays with 3 elements num_ruptures, num_sites, calc_time calc_times = AccumDict(accum=numpy.zeros(3, numpy.float32)) # Compute and save stochastic event sets num_ses = cmaker.ses_per_logic_tree_path grp_id = sources[0].grp_id # Compute the number of occurrences of the source group. This is used # for cluster groups or groups with mutually exclusive sources. if (getattr(sources, 'atomic', False) and getattr(sources, 'cluster', False)): eb_ruptures, calc_times = sample_cluster( sources, srcfilter, num_ses, vars(cmaker)) # Yield ruptures er = sum(src.num_ruptures for src, _ in srcfilter.filter(sources)) yield AccumDict(dict(rup_array=get_rup_array(eb_ruptures, srcfilter), calc_times=calc_times, eff_ruptures={grp_id: er})) else: eb_ruptures = [] eff_ruptures = 0 # AccumDict of arrays with 2 elements weight, calc_time calc_times = AccumDict(accum=numpy.zeros(3, numpy.float32)) for src, _ in srcfilter.filter(sources): nr = src.num_ruptures eff_ruptures += nr t0 = time.time() if len(eb_ruptures) > MAX_RUPTURES: # yield partial result to avoid running out of memory yield AccumDict(dict(rup_array=get_rup_array(eb_ruptures, srcfilter), calc_times={}, eff_ruptures={})) eb_ruptures.clear() samples = getattr(src, 'samples', 1) for rup, trt_smr, n_occ in src.sample_ruptures( samples * num_ses, cmaker.ses_seed): ebr = EBRupture(rup, src.source_id, trt_smr, n_occ) eb_ruptures.append(ebr) dt = time.time() - t0 calc_times[basename(src)] += numpy.array([nr, src.nsites, dt]) rup_array = get_rup_array(eb_ruptures, srcfilter) yield AccumDict(dict(rup_array=rup_array, calc_times=calc_times, eff_ruptures={grp_id: eff_ruptures}))
def sample_cluster(sources, srcfilter, num_ses, param): """ Yields ruptures generated by a cluster of sources. :param sources: A sequence of sources of the same group :param num_ses: Number of stochastic event sets :param param: a dictionary of additional parameters including ses_per_logic_tree_path :yields: dictionaries with keys rup_array, calc_times, eff_ruptures """ eb_ruptures = [] ses_seed = param['ses_seed'] numpy.random.seed(sources[0].serial(ses_seed)) [trt_smr] = set(src.trt_smr for src in sources) # AccumDict of arrays with 3 elements nsites, nruptures, calc_time calc_times = AccumDict(accum=numpy.zeros(3, numpy.float32)) # Set the parameters required to compute the number of occurrences # of the group of sources # assert param['oqparam'].number_of_logic_tree_samples > 0 samples = getattr(sources[0], 'samples', 1) tom = getattr(sources, 'temporal_occurrence_model') rate = tom.occurrence_rate time_span = tom.time_span # Note that using a single time interval corresponding to the product # of the investigation time and the number of realisations as we do # here is admitted only in the case of a time-independent model grp_num_occ = numpy.random.poisson(rate * time_span * samples * num_ses) # Now we process the sources included in the group. Possible cases: # * The group is a cluster. In this case we choose one rupture per each # source; uncertainty in the ruptures can be handled in this case # using mutually exclusive ruptures (note that this is admitted # only for nons-parametric sources). # * The group contains mutually exclusive sources. In this case we # choose one source and then one rupture from this source. rup_counter = {} rup_data = {} for rlz_num in range(grp_num_occ): if sources.cluster: for src, _ in srcfilter.filter(sources): # Track calculation time t0 = time.time() src_id = src.source_id rup = src.get_one_rupture(ses_seed) # The problem here is that we do not know a-priori the # number of occurrences of a given rupture. if src_id not in rup_counter: rup_counter[src_id] = {} rup_data[src_id] = {} if rup.idx not in rup_counter[src_id]: rup_counter[src_id][rup.idx] = 1 rup_data[src_id][rup.idx] = [rup, src_id, trt_smr] else: rup_counter[src_id][rup.idx] += 1 # Store info dt = time.time() - t0 calc_times[basename(src)] += numpy.array( [len(rup_data[src_id]), src.nsites, dt]) elif param['src_interdep'] == 'mutex': raise NotImplementedError('src_interdep == mutex') # Create event based ruptures for src_key in rup_data: for rup_key in rup_data[src_key]: rup, source_id, trt_smr = rup_data[src_key][rup_key] cnt = rup_counter[src_key][rup_key] ebr = EBRupture(rup, source_id, trt_smr, cnt) eb_ruptures.append(ebr) return eb_ruptures, calc_times