def get_job_results(self): """ Fetches the job results Returns the results' download URL. """ # poll job status ogc_job = self.wait_for_ogc_process_job(self.job_url, self.task_id) if ogc_job.get("status") != OGC_Status.SUCCESSFUL.value: logger.error(ogc_job) raise Exception( f"Unsuccessful OGC export. {ogc_job.get('status')}: {ogc_job.get('message')}" ) update_progress(self.task_id, progress=50, subtask_percentage=50) # fetch job results try: response = self.session.get(urljoin(self.job_url, "results/")) response.raise_for_status() except requests.exceptions.RequestException as e: raise Exception(f"Unsuccessful request:{e}") response_content = response.json() download_url = response_content.get("archive_format", dict()).get("href") if not download_url: logger.error(response_content) raise Exception( "The OGC Process server did not produce a download.") return download_url
def wait_for_ogc_process_job(self, job_url, task_id=None, interval=5): """ Function polls an OGC process' job until it is done processing. Returns the final response's content, which includes the job' status. """ job_status = None counter = 0 while job_status not in OGC_Status.get_finished_status(): counter += interval time.sleep(interval) try: response = self.session.get(job_url) response.raise_for_status() except requests.exceptions.RequestException as e: raise Exception(f"Unsuccessful request:{e}") response_content = response.json() job_status = response_content.get("status") if not job_status: logger.error(response_content) raise Exception( "OGC API Process service did not provide a valid status.") if task_id: update_progress(task_id, progress=25, subtask_percentage=response_content.get( "progress", 50)) logger.info( f"Waiting for {task_id} to finish from {job_url} (total time: %s).", counter) if job_status in OGC_Status.get_finished_status(): return response_content
def log_step(self, progress): from eventkit_cloud.tasks.helpers import update_progress self.eta.update(progress.progress) # This may also get called by update_progress but because update_progress # is rate-limited; we also do it here to get more data points for making # better eta estimates if self.task_uid: if self.log_step_counter == 0: if ( get_cache_value(uid=self.task_uid, attribute="status", model_name="ExportTaskRecord") == TaskState.CANCELED.value ): logger.error(f"The task uid: {self.task_uid} was canceled. Exiting...") raise Exception("The task was canceled.") update_progress(self.task_uid, progress=progress.progress * 100, eta=self.eta) self.log_step_counter = self.log_step_step self.log_step_counter -= 1 # Old version of super.log_step that includes ETA string # https://github.com/mapproxy/mapproxy/commit/93bc53a01318cd63facdb4ee13968caa847a5c17 if not self.verbose: return if (self._laststep + self.interval) < time.time(): logger.info( f"[{timestamp()}] {progress.progress * 100:6.2f}%\t{progress.progress_str.ljust(20)} ETA: {self.eta}\r" ) # [12:24:08] 100.00% 000000 ETA: 2020-08-06-12:22:30-UTC self._laststep = time.time()
def progress_callback(pct, msg, user_data): from eventkit_cloud.tasks.helpers import update_progress update_progress( user_data.get("task_uid"), progress=round(pct * 100), subtask_percentage=user_data.get("subtask_percentage", 100.0), msg=msg, )
def test_update_progress(self, mock_close, mock_set_cache_value): uid = "1234" estimated = timezone.now() update_progress(uid, progress=50, estimated_finish=estimated) mock_close.assert_called_once() mock_set_cache_value.assert_has_calls([ call(uid=uid, attribute="progress", model_name="ExportTaskRecord", value=50), call(uid=uid, attribute="estimated_finish", model_name="ExportTaskRecord", value=estimated), ])
def run(self, subtask_percentage=100, subtask_start=0, eta=None): """ Create the GeoPackage from the osm data. """ if self.is_complete: logger.debug("Skipping Geopackage, file exists") return keys_points = self.feature_selection.key_union("points") keys_lines = self.feature_selection.key_union("lines") keys_polygons = self.feature_selection.key_union("polygons") osmconf = OSMConfig(self.stage_dir, points=keys_points, lines=keys_lines, polygons=keys_polygons) conf = osmconf.create_osm_conf() logger.debug( f"Creating OSM gpkg using OSM_MAX_TMPFILE_SIZE {settings.OSM_MAX_TMPFILE_SIZE}" f"from {self.input_pbf} to {self.output_gpkg}") gdalutils.convert( input_file=self.input_pbf, output_file=self.output_gpkg, driver="GPKG", boundary=json.loads(self.aoi_geom.geojson), config_options=[ ("OSM_CONFIG_FILE", conf), ("OGR_INTERLEAVED_READING", "YES"), ("OSM_MAX_TMPFILE_SIZE", settings.OSM_MAX_TMPFILE_SIZE), ], task_uid=self.export_task_record_uid, ) # Cancel the provider task if the geopackage has no data. if not check_content_exists(self.output_gpkg): return None """ Create the default osm gpkg schema """ conn = sqlite3.connect(self.output_gpkg) conn.enable_load_extension(True) cur = conn.cursor() cur.execute("select load_extension('mod_spatialite')") cur.execute( "CREATE TABLE boundary (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, geom GEOMETRY)" ) cur.execute( "INSERT INTO boundary (geom) VALUES (GeomFromWKB(?,4326));", (self.aoi_geom.wkb, )) update_progress(self.export_task_record_uid, 30, subtask_percentage, subtask_start, eta=eta) cur.executescript(SPATIAL_SQL) self.update_zindexes(cur, self.feature_selection) update_progress(self.export_task_record_uid, 42, subtask_percentage, subtask_start, eta=eta) # add themes create_sqls, index_sqls = self.feature_selection.sqls for query in create_sqls: logger.debug(query) cur.executescript(query) update_progress(self.export_task_record_uid, 50, subtask_percentage, subtask_start, eta=eta) for query in index_sqls: logger.debug(query) cur.executescript(query) """ Remove points/lines/multipolygons tables """ cur.execute("DROP TABLE points") cur.execute("DROP TABLE lines") cur.execute("DROP TABLE multipolygons") cur.execute("VACUUM;") conn.commit() conn.close() if self.per_theme: # this creates per-theme GPKGs for theme in self.feature_selection.themes: conn = sqlite3.connect(self.stage_dir + slugify(theme) + ".gpkg") conn.enable_load_extension(True) cur = conn.cursor() cur.execute("attach database ? as 'geopackage'", (self.output_gpkg, )) cur.execute( "create table gpkg_spatial_ref_sys as select * from geopackage.gpkg_spatial_ref_sys" ) cur.execute( "create table gpkg_contents as select * from geopackage.gpkg_contents where 0" ) cur.execute( "create table gpkg_geometry_columns as select * from geopackage.gpkg_geometry_columns where 0" ) for geom_type in self.feature_selection.geom_types(theme): for stmt in self.feature_selection.create_sql( theme, geom_type): cur.executescript(stmt) cur.execute("VACUUM;") conn.commit() conn.close() update_progress(self.export_task_record_uid, 100, subtask_percentage, subtask_start, eta=eta) return self.output_gpkg
def run_query(self, user_details=None, subtask_percentage=100, subtask_start=0, eta=None): """ Run the overpass query. subtask_percentage is the percentage of the task referenced by self.task_uid this method takes up. Used to update progress. Return: the path to the overpass extract """ from eventkit_cloud.tasks.helpers import update_progress from audit_logging.file_logging import logging_open # This is just to make it easier to trace when user_details haven't been sent if user_details is None: user_details = {"username": "******"} req = None query = self.get_query() logger.debug(query) logger.debug(f"Query started at: {datetime.now()}") try: update_progress( self.task_uid, progress=0, subtask_percentage=subtask_percentage, subtask_start=subtask_start, eta=eta, msg="Querying provider data", ) conf: dict = yaml.safe_load(self.config) or dict() session = get_or_update_session(slug=self.slug, **conf) req = session.post(self.url, data=query, stream=True) if not req.ok: # Workaround for https://bugs.python.org/issue27777 query = {"data": query} req = session.post(self.url, data=query, stream=True) req.raise_for_status() try: total_size = int(req.headers.get("content-length")) except (ValueError, TypeError): if req.content: total_size = len(req.content) else: raise Exception("Overpass Query failed to return any data") # Since the request takes a while, jump progress to a very high percent... query_percent = 85.0 download_percent = 100.0 - query_percent update_progress( self.task_uid, progress=query_percent, subtask_percentage=subtask_percentage, subtask_start=subtask_start, eta=eta, msg="Downloading data from provider: 0 of {:.2f} MB(s)".format(total_size / float(1e6)), ) CHUNK = 1024 * 1024 * 2 # 2MB chunks update_interval = 1024 * 1024 * 250 # Every 250 MB written_size = 0 last_update = 0 with logging_open(self.raw_osm, "wb", user_details=user_details) as fd: for chunk in req.iter_content(CHUNK): fd.write(chunk) written_size += CHUNK # Limit the number of calls to update_progress because every time update_progress is called, # the ExportTask model is updated, causing django_audit_logging to update the audit way to much # (via the post_save hook). In the future, we might try still using update progress just as much # but update the model less to make the audit log less spammed, or making audit_logging only log # certain model changes rather than logging absolutely everything. last_update += CHUNK if last_update > update_interval: last_update = 0 progress = query_percent + (float(written_size) / float(total_size) * download_percent) update_progress( self.task_uid, progress=progress, subtask_percentage=subtask_percentage, subtask_start=subtask_start, eta=eta, msg="Downloading data from provider: {:.2f} of {:.2f} MB(s)".format( written_size / float(1e6), total_size / float(1e6) ), ) # Done w/ this subtask update_progress( self.task_uid, progress=100, subtask_percentage=subtask_percentage, subtask_start=subtask_start, eta=eta, msg="Completed downloading data from provider", ) except exceptions.RequestException as e: logger.error("Overpass query threw: {0}".format(e)) raise exceptions.RequestException(e) finally: if req: req.close() logger.debug(f"Query finished at {datetime.now()}") logger.debug(f"Wrote overpass query results to: {self.raw_osm}") return self.raw_osm