def save_events(self, rup_array): """ :param rup_array: an array of ruptures with fields grp_id :returns: a list of RuptureGetters """ # this is very fast compared to saving the ruptures eids = rupture.get_eids( rup_array, self.samples_by_grp, self.num_rlzs_by_grp) self.check_overflow() # check the number of events events = numpy.zeros(len(eids), rupture.events_dt) # when computing the events all ruptures must be considered, # including the ones far away that will be discarded later on rgetters = self.gen_rupture_getters() # build the associations eid -> rlz in parallel smap = parallel.Starmap(RuptureGetter.get_eid_rlz, ((rgetter,) for rgetter in rgetters), self.monitor('get_eid_rlz'), progress=logging.debug) i = 0 for eid_rlz in smap: # 30 million of events associated in 1 minute! for er in eid_rlz: events[i] = er i += 1 if i >= TWO32: raise ValueError('There are more than %d events!' % i) events.sort(order='id') # fast too n_unique_events = len(numpy.unique(events['id'])) assert n_unique_events == len(events), (n_unique_events, len(events)) self.datastore['events'] = events
def save_events(self, rup_array): """ :param rup_array: an array of ruptures with fields grp_id :returns: a list of RuptureGetters """ # this is very fast compared to saving the ruptures eids = rupture.get_eids( rup_array, self.samples_by_grp, self.num_rlzs_by_grp) self.check_overflow() # check the number of events events = numpy.zeros(len(eids), rupture.events_dt) # when computing the events all ruptures must be considered, # including the ones far away that will be discarded later on rgetters = self.gen_rupture_getters() # build the associations eid -> rlz in parallel smap = parallel.Starmap(RuptureGetter.get_eid_rlz, ((rgetter,) for rgetter in rgetters), self.monitor('get_eid_rlz'), progress=logging.debug) i = 0 for eid_rlz in smap: # 30 million of events associated in 1 minute! for er in eid_rlz: events[i] = er i += 1 events.sort(order=['rlz', 'eid']) # fast too n_unique_events = len(numpy.unique(events['eid'])) assert n_unique_events == len(events), (n_unique_events, len(events)) self.datastore['events'] = events indices = numpy.zeros((self.R, 2), U32) for r, [startstop] in get_indices(events['rlz']).items(): indices[r] = startstop self.datastore.set_attrs('events', indices=indices)
def save_events(self, rup_array): """ :param rup_array: an array of ruptures with fields grp_id :returns: a list of RuptureGetters """ # this is very fast compared to saving the ruptures eids = rupture.get_eids(rup_array, self.samples_by_grp, self.num_rlzs_by_grp) self.E = len(eids) self.check_overflow() # check the number of events events = numpy.zeros(len(eids), rupture.events_dt) events['eid'] = eids self.eid2idx = eid2idx = dict(zip(events['eid'], range(self.E))) rgetters = self.get_rupture_getters() # build the associations eid -> rlz in parallel smap = parallel.Starmap(RuptureGetter.get_eid_rlz, ((rgetter, ) for rgetter in rgetters), self.monitor('get_eid_rlz'), progress=logging.debug) for eid_rlz in smap: # fast: 30 million of events associated in 1 minute for eid, rlz in eid_rlz: events[eid2idx[eid]]['rlz'] = rlz self.datastore['events'] = events # fast too return rgetters
def save_events(self, rup_array): """ :param rup_array: an array of ruptures with fields grp_id :returns: a list of RuptureGetters """ # this is very fast compared to saving the ruptures eids = rupture.get_eids(rup_array, self.samples_by_grp, self.num_rlzs_by_grp) self.check_overflow() # check the number of events events = numpy.zeros(len(eids), rupture.events_dt) # when computing the events all ruptures must be considered, # including the ones far away that will be discarded later on rgetters = gen_rgetters(self.datastore) # build the associations eid -> rlz sequentially or in parallel # this is very fast: I saw 30 million events associated in 1 minute! logging.info('Building assocs event_id -> rlz_id for {:_d} events' ' and {:_d} ruptures'.format(len(events), len(rup_array))) if len(events) < 1E5: it = map(RuptureGetter.get_eid_rlz, rgetters) else: # parallel composite array with the associations eid->rlz it = parallel.Starmap(RuptureGetter.get_eid_rlz, ((rgetter, ) for rgetter in rgetters), progress=logging.debug, h5=self.datastore.hdf5) i = 0 for eid_rlz in it: for er in eid_rlz: events[i] = er i += 1 if i >= TWO32: raise ValueError('There are more than %d events!' % i) events.sort(order='rup_id') # fast too # sanity check n_unique_events = len(numpy.unique(events[['id', 'rup_id']])) assert n_unique_events == len(events), (n_unique_events, len(events)) events['id'] = numpy.arange(len(events)) # set event year and event ses starting from 1 itime = int(self.oqparam.investigation_time) nses = self.oqparam.ses_per_logic_tree_path extra = numpy.zeros(len(events), [('year', U16), ('ses_id', U16)]) # seed for year and ses_id numpy.random.seed(self.oqparam.ses_seed) extra['year'] = numpy.random.choice(itime, len(events)) + 1 extra['ses_id'] = numpy.random.choice(nses, len(events)) + 1 self.datastore['events'] = util.compose_arrays(events, extra) eindices = get_indices(events['rup_id']) arr = numpy.array(list(eindices.values()))[:, 0, :] self.datastore['ruptures']['e0'] = arr[:, 0] self.datastore['ruptures']['e1'] = arr[:, 1]
def save_events(self, rup_array): """ :param rup_array: an array of ruptures with fields grp_id :returns: a list of RuptureGetters """ # this is very fast compared to saving the ruptures eids = rupture.get_eids(rup_array, self.samples_by_grp, self.num_rlzs_by_grp) self.check_overflow() # check the number of events events = numpy.zeros(len(eids), rupture.events_dt) # when computing the events all ruptures must be considered, # including the ones far away that will be discarded later on rgetters = self.gen_rupture_getters() # build the associations eid -> rlz sequentially or in parallel # this is very fast: I saw 30 million events associated in 1 minute! logging.info( 'Building associations event_id -> rlz_id for %d events' ' and %d ruptures', len(events), len(rup_array)) if len(events) < 1E5: it = map(RuptureGetter.get_eid_rlz, rgetters) else: it = parallel.Starmap(RuptureGetter.get_eid_rlz, ((rgetter, ) for rgetter in rgetters), progress=logging.debug, hdf5path=self.datastore.filename) i = 0 for eid_rlz in it: for er in eid_rlz: events[i] = er i += 1 if i >= TWO32: raise ValueError('There are more than %d events!' % i) events.sort(order='rup_id') # fast too # sanity check n_unique_events = len(numpy.unique(events[['id', 'rup_id']])) assert n_unique_events == len(events), (n_unique_events, len(events)) events['id'] = numpy.arange(len(events)) self.datastore['events'] = events eindices = get_indices(events['rup_id']) arr = numpy.array(list(eindices.values()))[:, 0, :] self.datastore['eslices'] = arr # shape (U, 2)