def log_step(self, progress): from eventkit_cloud.tasks.export_tasks import update_progress self.eta.update( progress.progress ) # This may also get called by update_progress but because update_progress # is rate-limited; we also do it here to get more data points for making # better eta estimates if self.task_uid: if self.log_step_counter == 0: update_progress(self.task_uid, progress=progress.progress * 100, eta=self.eta) self.log_step_counter = self.log_step_step self.log_step_counter -= 1 # Old version of super.log_step that includes ETA string # https://github.com/mapproxy/mapproxy/commit/93bc53a01318cd63facdb4ee13968caa847a5c17 if not self.verbose: return if (self._laststep + .5) < time.time(): # log progress at most every 500ms self.out.write('[%s] %6.2f%%\t%-20s ETA: %s\r' % (timestamp(), progress.progress * 100, progress.progress_str, self.eta)) self.out.flush() self._laststep = time.time()
def log_step(self, progress): from eventkit_cloud.tasks.export_tasks import update_progress if self.task_uid: if self.log_step_counter == 0: update_progress(self.task_uid, progress=progress.progress * 100) self.log_step_counter = self.log_step_step self.log_step_counter -= 1 super(CustomLogger, self).log_step(progress)
def test_update_progress(self, export_task, mock_close, mock_set_cache_value): export_provider_task = DataProviderTaskRecord.objects.create( run=self.run, name='test_provider_task' ) saved_export_task_uid = ExportTaskRecord.objects.create( export_provider_task=export_provider_task, status=TaskStates.PENDING.value, name="test_task" ).uid estimated = timezone.now() update_progress(saved_export_task_uid, progress=50, estimated_finish=estimated) mock_close.assert_called_once() mock_set_cache_value.assert_has_calls([call(uid=saved_export_task_uid, attribute='progress', model_name='ExportTaskRecord', value=50), call(uid=saved_export_task_uid, attribute='estimated_finish', model_name='ExportTaskRecord', value=estimated)])
def test_update_progress(self, export_task, mock_close, mock_set_cache_value): export_provider_task = DataProviderTaskRecord.objects.create( run=self.run, name='test_provider_task') saved_export_task_uid = ExportTaskRecord.objects.create( export_provider_task=export_provider_task, status=TaskStates.PENDING.value, name="test_task").uid estimated = timezone.now() update_progress(saved_export_task_uid, progress=50, estimated_finish=estimated) mock_close.assert_called_once() mock_set_cache_value.assert_has_calls([ call(uid=saved_export_task_uid, attribute='progress', model_name='ExportTaskRecord', value=50), call(uid=saved_export_task_uid, attribute='estimated_finish', model_name='ExportTaskRecord', value=estimated) ])
def log_step(self, progress): from eventkit_cloud.tasks.export_tasks import update_progress self.eta.update(progress.progress) # This may also get called by update_progress but because update_progress # is rate-limited; we also do it here to get more data points for making # better eta estimates if self.task_uid: if self.log_step_counter == 0: update_progress(self.task_uid, progress=progress.progress * 100, eta=self.eta) self.log_step_counter = self.log_step_step self.log_step_counter -= 1 # Old version of super.log_step that includes ETA string # https://github.com/mapproxy/mapproxy/commit/93bc53a01318cd63facdb4ee13968caa847a5c17 if not self.verbose: return if (self._laststep + .5) < time.time(): # log progress at most every 500ms self.out.write('[%s] %6.2f%%\t%-20s ETA: %s\r' % ( timestamp(), progress.progress*100, progress.progress_str, self.eta )) self.out.flush() self._laststep = time.time()
def run_query(self, user_details=None, subtask_percentage=100, subtask_start=0, eta=None): """ Run the overpass query. subtask_percentage is the percentage of the task referenced by self.task_uid this method takes up. Used to update progress. Return: the path to the overpass extract """ from eventkit_cloud.tasks.export_tasks import update_progress from audit_logging.file_logging import logging_open # This is just to make it easier to trace when user_details haven't been sent if user_details is None: user_details = {'username': '******'} req = None q = self.get_query() logger.debug(q) logger.debug('Query started at: %s'.format(datetime.now())) try: update_progress(self.task_uid, progress=0, subtask_percentage=subtask_percentage, subtask_start=subtask_start, eta=eta, msg='Querying provider data') req = auth_requests.post(self.url, slug=self.slug, data=q, stream=True, verify=self.verify_ssl) try: total_size = int(req.headers.get('content-length')) except (ValueError, TypeError): if req.content: total_size = len(req.content) else: raise Exception("Overpass Query failed to return any data") # Since the request takes a while, jump progress to a very high percent... query_percent = 85.0 download_percent = 100.0 - query_percent update_progress( self.task_uid, progress=query_percent, subtask_percentage=subtask_percentage, subtask_start=subtask_start, eta=eta, msg='Downloading data from provider: 0 of {:.2f} MB(s)'.format( total_size / float(1e6))) CHUNK = 1024 * 1024 * 2 # 2MB chunks update_interval = 1024 * 1024 * 250 # Every 250 MB written_size = 0 last_update = 0 with logging_open(self.raw_osm, 'wb', user_details=user_details) as fd: for chunk in req.iter_content(CHUNK): fd.write(chunk) written_size += CHUNK # Limit the number of calls to update_progress because every time update_progress is called, # the ExportTask model is updated, causing django_audit_logging to update the audit way to much # (via the post_save hook). In the future, we might try still using update progress just as much # but update the model less to make the audit log less spammed, or making audit_logging only log # certain model changes rather than logging absolutely everything. last_update += CHUNK if last_update > update_interval: last_update = 0 progress = query_percent + (float(written_size) / float(total_size) * download_percent) update_progress( self.task_uid, progress=progress, subtask_percentage=subtask_percentage, subtask_start=subtask_start, eta=eta, msg= 'Downloading data from provider: {:.2f} of {:.2f} MB(s)' .format(written_size / float(1e6), total_size / float(1e6))) # Done w/ this subtask update_progress(self.task_uid, progress=100, subtask_percentage=subtask_percentage, subtask_start=subtask_start, eta=eta, msg='Completed downloading data from provider') except exceptions.RequestException as e: logger.error('Overpass query threw: {0}'.format(e)) raise exceptions.RequestException(e) finally: if req: req.close() logger.debug('Query finished at %s'.format(datetime.now())) logger.debug('Wrote overpass query results to: %s'.format( self.raw_osm)) return self.raw_osm
def run_query(self, user_details=None, subtask_percentage=100): """ Run the overpass query. subtask_percentage is the percentage of the task referenced by self.task_uid this method takes up. Used to update progress. Return: the path to the overpass extract """ # This is just to make it easier to trace when user_details haven't been sent if user_details is None: user_details = {'username': '******'} from eventkit_cloud.tasks.export_tasks import update_progress q = self.get_query() logger.debug(q) logger.debug('Query started at: %s'.format(datetime.now())) try: req = auth_requests.post(self.url, slug=self.slug, data=q, stream=True, verify=self.verify_ssl) # Since the request takes a while, jump progress to an arbitrary 50 percent... update_progress(self.task_uid, progress=50, subtask_percentage=subtask_percentage) try: size = int(req.headers.get('content-length')) except (ValueError, TypeError): if req.content: size = len(req.content) else: raise Exception("Overpass Query failed to return any data") inflated_size = size * 2 CHUNK = 1024 * 1024 * 2 # 2MB chunks from audit_logging.file_logging import logging_open with logging_open(self.raw_osm, 'wb', user_details=user_details) as fd: for chunk in req.iter_content(CHUNK): fd.write(chunk) size += CHUNK # removing this call to update_progress for now because every time update_progress is called, # the ExportTask model is updated, causing django_audit_logging to update the audit way to much # (via the post_save hook). In the future, we might try still using update progress just as much # but update the model less to make the audit log less spammed, or making audit_logging only log # certain model changes rather than logging absolutely everything. ## Because progress is already at 50, we need to make this part start at 50 percent #update_progress( # self.task_uid, progress=(float(size) / float(inflated_size)) * 100, # subtask_percentage=subtask_percentage #) except exceptions.RequestException as e: logger.error('Overpass query threw: {0}'.format(e)) raise exceptions.RequestException(e) logger.debug('Query finished at %s'.format(datetime.now())) logger.debug('Wrote overpass query results to: %s'.format( self.raw_osm)) return self.raw_osm
def run(self, subtask_percentage=100, subtask_start=0, eta=None): """ Create the GeoPackage from the osm data. """ # avoiding a circular import from eventkit_cloud.tasks.export_tasks import update_progress if self.is_complete: LOG.debug("Skipping Geopackage, file exists") return keys_points = self.feature_selection.key_union('points') keys_lines = self.feature_selection.key_union('lines') keys_polygons = self.feature_selection.key_union('polygons') osmconf = OSMConfig(self.stage_dir, points=keys_points, lines=keys_lines, polygons=keys_polygons) conf = osmconf.create_osm_conf() ogr_cmd = self.ogr_cmd.safe_substitute({'gpkg': self.output_gpkg, 'osm': self.input_pbf, 'osmconf': conf}) LOG.debug('Running: %s' % ogr_cmd) subprocess.check_call(ogr_cmd, shell=True, executable='/bin/bash') """ Create the default osm gpkg schema """ conn = sqlite3.connect(self.output_gpkg) conn.enable_load_extension(True) cur = conn.cursor() cur.execute("select load_extension('mod_spatialite')") cur.execute("CREATE TABLE boundary (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, geom GEOMETRY)"); cur.execute("INSERT INTO boundary (geom) VALUES (GeomFromWKB(?,4326));", (self.aoi_geom.wkb,)) update_progress(self.export_task_record_uid, 30, subtask_percentage, subtask_start, eta=eta) cur.executescript(SPATIAL_SQL) self.update_zindexes(cur, self.feature_selection) update_progress(self.export_task_record_uid, 42, subtask_percentage, subtask_start, eta=eta) # add themes create_sqls, index_sqls = self.feature_selection.sqls for query in create_sqls: LOG.debug(query) cur.executescript(query) update_progress(self.export_task_record_uid, 50, subtask_percentage, subtask_start, eta=eta) for query in index_sqls: LOG.debug(query) cur.executescript(query) """ Remove points/lines/multipolygons tables """ cur.execute("DROP TABLE points") cur.execute("DROP TABLE lines") cur.execute("DROP TABLE multipolygons") conn.commit() conn.close() if self.per_theme: # this creates per-theme GPKGs for theme in self.feature_selection.themes: conn = sqlite3.connect(self.stage_dir + slugify(theme) + ".gpkg") conn.enable_load_extension(True) cur = conn.cursor() cur.execute("attach database ? as 'geopackage'", (self.output_gpkg,)) cur.execute("create table gpkg_spatial_ref_sys as select * from geopackage.gpkg_spatial_ref_sys") cur.execute("create table gpkg_contents as select * from geopackage.gpkg_contents where 0") cur.execute( "create table gpkg_geometry_columns as select * from geopackage.gpkg_geometry_columns where 0") for geom_type in self.feature_selection.geom_types(theme): for stmt in self.feature_selection.create_sql(theme, geom_type): cur.executescript(stmt) conn.commit() conn.close() update_progress(self.export_task_record_uid, 100, subtask_percentage, subtask_start, eta=eta)