def test_add_entry(self): """Test multiple add entry calls""" inserter = BulkInserter(OqUser) inserter.add_entry(user_name='user1', full_name='An user') self.assertEquals(sorted(['user_name', 'full_name']), sorted(inserter.fields)) self.assertEquals(inserter.count, 1) self.assertEquals(_map_values(inserter.fields, [{'user_name': 'user1', 'full_name': 'An user'}]), inserter.values) inserter.add_entry(user_name='user2', full_name='Another user') inserter.add_entry(user_name='user3', full_name='A third user') self.assertEquals(sorted(['user_name', 'full_name']), sorted(inserter.fields)) self.assertEquals(inserter.count, 3) self.assertEquals(_map_values(inserter.fields, [{'user_name': 'user1', 'full_name': 'An user'}, {'user_name': 'user2', 'full_name': 'Another user'}, {'user_name': 'user3', 'full_name': 'A third user'}]), inserter.values)
def test_add_entry_different_keys(self): inserter = BulkInserter(OqUser) inserter.add_entry(user_name='user1', full_name='An user') self.assertRaises(AssertionError, inserter.add_entry, user_name='user1') self.assertRaises(AssertionError, inserter.add_entry, user_name='user1', full_name='An user', data_is_open=False)
def test_flush_geometry(self): inserter = BulkInserter(GmfData) connection = writer.connections['reslt_writer'] inserter.add_entry(location='POINT(1 1)', output_id=1) fields = inserter.fields inserter.flush() if fields[0] == 'output_id': values = '%s, GeomFromText(%s, 4326)' else: values = 'GeomFromText(%s, 4326), %s' self.assertEquals('INSERT INTO "hzrdr"."gmf_data" (%s) VALUES (%s)' % (", ".join(fields), values), connection.sql)
def __init__(self, job, imt): self._job = job self._imt = imt self._inserter = BulkInserter(self.__class__.model) self._aggregate_result = None self._transaction_handler = None
class AggregateResultWriter(object): """ Manager to serialize to db Aggregate results (Mean curves, Quantile Curves, Maps, etc.). It implements the context manager pattern to take care of the transaction management :attribute _job The current job :attribute _imt The intensity measure type for this aggregate result """ def __init__(self, job, imt): self._job = job self._imt = imt self._inserter = BulkInserter(self.__class__.model) self._aggregate_result = None self._transaction_handler = None def _create_output(self): """ Create an Output object related to the aggregate result """ output = models.Output.objects.create_output( job=self._job, output_type=self.__class__.output_type, display_name=self.display_name()) return output def display_name(self): """ The display name of the output being created (used for the Output object) """ raise NotImplementedError def create_aggregate_result(self): """ Create an Aggregate result (both the Output object and the corresponding curve/map/etc. object """ output = self._create_output() self._aggregate_result = self._create_aggregate_result_item(output) return self._aggregate_result, output def _create_aggregate_result_item(self, output): """ Create an aggregate result item (only the HazardCurve / HazardMap). Abstract method """ raise NotImplementedError def add_data(self, location, _): """ Add a aggregate result data (to be serialized when flush is called). Abstract method """ raise NotImplementedError def __enter__(self): self._transaction_handler = transaction.commit_on_success( using='reslt_writer') self._transaction_handler.__enter__() return self def __exit__(self, exc_type, exc_val, exc_tb): if not exc_type: self._flush_data() else: raise exc_val self._transaction_handler.__exit__(exc_type, exc_val, exc_tb) def _flush_data(self): """ Flush the data to the db """ self._inserter.flush()
def do_aggregate_post_proc(self): """ Grab hazard data for all realizations and sites from the database and compute mean and/or quantile aggregates (depending on which options are enabled in the calculation). Post-processing results will be stored directly into the database. """ num_rlzs = models.LtRealization.objects.filter( hazard_calculation=self.hc).count() num_site_blocks_per_incr = int(_CURVE_CACHE_SIZE) / int(num_rlzs) if num_site_blocks_per_incr == 0: # This means we have `num_rlzs` >= `_CURVE_CACHE_SIZE`. # The minimum number of sites should be 1. num_site_blocks_per_incr = 1 slice_incr = num_site_blocks_per_incr * num_rlzs # unit: num records for imt, imls in self.hc.intensity_measure_types_and_levels.items(): im_type, sa_period, sa_damping = models.parse_imt(imt) # prepare `output` and `hazard_curve` containers in the DB: container_ids = dict() if self.hc.mean_hazard_curves: mean_output = models.Output.objects.create_output( job=self.job, display_name='mean-curves-%s' % imt, output_type='hazard_curve' ) mean_hc = models.HazardCurve.objects.create( output=mean_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='mean' ) container_ids['mean'] = mean_hc.id if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: q_output = models.Output.objects.create_output( job=self.job, display_name=( 'quantile(%s)-curves-%s' % (quantile, imt) ), output_type='hazard_curve' ) q_hc = models.HazardCurve.objects.create( output=q_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='quantile', quantile=quantile ) container_ids['q%s' % quantile] = q_hc.id all_curves_for_imt = models.HazardCurveData.objects\ .all_curves_for_imt( self.job.id, im_type, sa_period, sa_damping)\ .order_by('location') with transaction.commit_on_success(using='reslt_writer'): inserter = BulkInserter(models.HazardCurveData, max_cache_size=_CURVE_CACHE_SIZE) for chunk in models.queryset_iter(all_curves_for_imt, slice_incr): # slice each chunk by `num_rlzs` into `site_chunk` # and compute the aggregate for site_chunk in block_splitter(chunk, num_rlzs): site = site_chunk[0].location curves_poes = [x.poes for x in site_chunk] curves_weights = [x.weight for x in site_chunk] # do means and quantiles # quantiles first: if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: if self.hc.number_of_logic_tree_samples == 0: # explicitly weighted quantiles q_curve = weighted_quantile_curve( curves_poes, curves_weights, quantile ) else: # implicitly weighted quantiles q_curve = quantile_curve( curves_poes, quantile ) inserter.add_entry( hazard_curve_id=( container_ids['q%s' % quantile] ), poes=q_curve.tolist(), location=site.wkt ) # then means if self.hc.mean_hazard_curves: m_curve = mean_curve( curves_poes, weights=curves_weights ) inserter.add_entry( hazard_curve_id=container_ids['mean'], poes=m_curve.tolist(), location=site.wkt ) inserter.flush()
def test_flush(self): inserter = BulkInserter(OqUser) connection = writer.connections['admin'] inserter.add_entry(user_name='user1', full_name='An user') fields = inserter.fields inserter.flush() self.assertEquals('INSERT INTO "admin"."oq_user" (%s) VALUES' \ ' (%%s, %%s)' % (", ".join(fields)), connection.sql) inserter.add_entry(user_name='user1', full_name='An user') inserter.add_entry(user_name='user2', full_name='Another user') fields = inserter.fields inserter.flush() self.assertEquals('INSERT INTO "admin"."oq_user" (%s) VALUES' \ ' (%%s, %%s), (%%s, %%s)' % (", ".join(fields)), connection.sql)