Ejemplo n.º 1
0
    def get_job_results(self):
        """
        Fetches the job results
        Returns the results' download URL.
        """

        # poll job status
        ogc_job = self.wait_for_ogc_process_job(self.job_url, self.task_id)
        if ogc_job.get("status") != OGC_Status.SUCCESSFUL.value:
            logger.error(ogc_job)
            raise Exception(
                f"Unsuccessful OGC export. {ogc_job.get('status')}: {ogc_job.get('message')}"
            )

        update_progress(self.task_id, progress=50, subtask_percentage=50)

        # fetch job results
        try:
            response = self.session.get(urljoin(self.job_url, "results/"))
            response.raise_for_status()
        except requests.exceptions.RequestException as e:
            raise Exception(f"Unsuccessful request:{e}")

        response_content = response.json()
        download_url = response_content.get("archive_format",
                                            dict()).get("href")

        if not download_url:
            logger.error(response_content)
            raise Exception(
                "The OGC Process server did not produce a download.")

        return download_url
Ejemplo n.º 2
0
    def wait_for_ogc_process_job(self, job_url, task_id=None, interval=5):
        """
        Function polls an OGC process' job until it is done processing.
        Returns the final response's content, which includes the job' status.
        """

        job_status = None
        counter = 0
        while job_status not in OGC_Status.get_finished_status():
            counter += interval
            time.sleep(interval)
            try:
                response = self.session.get(job_url)
                response.raise_for_status()
            except requests.exceptions.RequestException as e:
                raise Exception(f"Unsuccessful request:{e}")
            response_content = response.json()
            job_status = response_content.get("status")
            if not job_status:
                logger.error(response_content)
                raise Exception(
                    "OGC API Process service did not provide a valid status.")
            if task_id:
                update_progress(task_id,
                                progress=25,
                                subtask_percentage=response_content.get(
                                    "progress", 50))
            logger.info(
                f"Waiting for {task_id} to finish from {job_url} (total time: %s).",
                counter)

        if job_status in OGC_Status.get_finished_status():
            return response_content
Ejemplo n.º 3
0
    def log_step(self, progress):
        from eventkit_cloud.tasks.helpers import update_progress

        self.eta.update(progress.progress)  # This may also get called by update_progress but because update_progress
        # is rate-limited; we also do it here to get more data points for making
        # better eta estimates

        if self.task_uid:

            if self.log_step_counter == 0:
                if (
                    get_cache_value(uid=self.task_uid, attribute="status", model_name="ExportTaskRecord")
                    == TaskState.CANCELED.value
                ):
                    logger.error(f"The task uid: {self.task_uid} was canceled. Exiting...")
                    raise Exception("The task was canceled.")

                update_progress(self.task_uid, progress=progress.progress * 100, eta=self.eta)
                self.log_step_counter = self.log_step_step
            self.log_step_counter -= 1

        # Old version of super.log_step that includes ETA string
        # https://github.com/mapproxy/mapproxy/commit/93bc53a01318cd63facdb4ee13968caa847a5c17
        if not self.verbose:
            return
        if (self._laststep + self.interval) < time.time():
            logger.info(
                f"[{timestamp()}] {progress.progress * 100:6.2f}%\t{progress.progress_str.ljust(20)} ETA: {self.eta}\r"
            )
            # [12:24:08] 100.00%     000000               ETA: 2020-08-06-12:22:30-UTC
            self._laststep = time.time()
Ejemplo n.º 4
0
def progress_callback(pct, msg, user_data):
    from eventkit_cloud.tasks.helpers import update_progress

    update_progress(
        user_data.get("task_uid"),
        progress=round(pct * 100),
        subtask_percentage=user_data.get("subtask_percentage", 100.0),
        msg=msg,
    )
Ejemplo n.º 5
0
 def test_update_progress(self, mock_close, mock_set_cache_value):
     uid = "1234"
     estimated = timezone.now()
     update_progress(uid, progress=50, estimated_finish=estimated)
     mock_close.assert_called_once()
     mock_set_cache_value.assert_has_calls([
         call(uid=uid,
              attribute="progress",
              model_name="ExportTaskRecord",
              value=50),
         call(uid=uid,
              attribute="estimated_finish",
              model_name="ExportTaskRecord",
              value=estimated),
     ])
Ejemplo n.º 6
0
    def run(self, subtask_percentage=100, subtask_start=0, eta=None):
        """
        Create the GeoPackage from the osm data.
        """
        if self.is_complete:
            logger.debug("Skipping Geopackage, file exists")
            return
        keys_points = self.feature_selection.key_union("points")
        keys_lines = self.feature_selection.key_union("lines")
        keys_polygons = self.feature_selection.key_union("polygons")
        osmconf = OSMConfig(self.stage_dir,
                            points=keys_points,
                            lines=keys_lines,
                            polygons=keys_polygons)
        conf = osmconf.create_osm_conf()
        logger.debug(
            f"Creating OSM gpkg using OSM_MAX_TMPFILE_SIZE {settings.OSM_MAX_TMPFILE_SIZE}"
            f"from {self.input_pbf} to {self.output_gpkg}")
        gdalutils.convert(
            input_file=self.input_pbf,
            output_file=self.output_gpkg,
            driver="GPKG",
            boundary=json.loads(self.aoi_geom.geojson),
            config_options=[
                ("OSM_CONFIG_FILE", conf),
                ("OGR_INTERLEAVED_READING", "YES"),
                ("OSM_MAX_TMPFILE_SIZE", settings.OSM_MAX_TMPFILE_SIZE),
            ],
            task_uid=self.export_task_record_uid,
        )
        # Cancel the provider task if the geopackage has no data.
        if not check_content_exists(self.output_gpkg):
            return None
        """
        Create the default osm gpkg schema
        """
        conn = sqlite3.connect(self.output_gpkg)
        conn.enable_load_extension(True)
        cur = conn.cursor()
        cur.execute("select load_extension('mod_spatialite')")
        cur.execute(
            "CREATE TABLE boundary (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, geom GEOMETRY)"
        )
        cur.execute(
            "INSERT INTO boundary (geom) VALUES (GeomFromWKB(?,4326));",
            (self.aoi_geom.wkb, ))

        update_progress(self.export_task_record_uid,
                        30,
                        subtask_percentage,
                        subtask_start,
                        eta=eta)

        cur.executescript(SPATIAL_SQL)
        self.update_zindexes(cur, self.feature_selection)
        update_progress(self.export_task_record_uid,
                        42,
                        subtask_percentage,
                        subtask_start,
                        eta=eta)

        # add themes
        create_sqls, index_sqls = self.feature_selection.sqls
        for query in create_sqls:
            logger.debug(query)
            cur.executescript(query)
        update_progress(self.export_task_record_uid,
                        50,
                        subtask_percentage,
                        subtask_start,
                        eta=eta)

        for query in index_sqls:
            logger.debug(query)
            cur.executescript(query)
        """
        Remove points/lines/multipolygons tables
        """
        cur.execute("DROP TABLE points")
        cur.execute("DROP TABLE lines")
        cur.execute("DROP TABLE multipolygons")

        cur.execute("VACUUM;")

        conn.commit()
        conn.close()

        if self.per_theme:
            # this creates per-theme GPKGs
            for theme in self.feature_selection.themes:
                conn = sqlite3.connect(self.stage_dir + slugify(theme) +
                                       ".gpkg")
                conn.enable_load_extension(True)
                cur = conn.cursor()
                cur.execute("attach database ? as 'geopackage'",
                            (self.output_gpkg, ))
                cur.execute(
                    "create table gpkg_spatial_ref_sys as select * from geopackage.gpkg_spatial_ref_sys"
                )
                cur.execute(
                    "create table gpkg_contents as select * from geopackage.gpkg_contents where 0"
                )
                cur.execute(
                    "create table gpkg_geometry_columns as select * from geopackage.gpkg_geometry_columns where 0"
                )
                for geom_type in self.feature_selection.geom_types(theme):
                    for stmt in self.feature_selection.create_sql(
                            theme, geom_type):
                        cur.executescript(stmt)
                cur.execute("VACUUM;")
                conn.commit()
                conn.close()

        update_progress(self.export_task_record_uid,
                        100,
                        subtask_percentage,
                        subtask_start,
                        eta=eta)
        return self.output_gpkg
Ejemplo n.º 7
0
    def run_query(self, user_details=None, subtask_percentage=100, subtask_start=0, eta=None):
        """
        Run the overpass query.
        subtask_percentage is the percentage of the task referenced by self.task_uid this method takes up.
            Used to update progress.

        Return:
            the path to the overpass extract
        """
        from eventkit_cloud.tasks.helpers import update_progress
        from audit_logging.file_logging import logging_open

        # This is just to make it easier to trace when user_details haven't been sent
        if user_details is None:
            user_details = {"username": "******"}

        req = None
        query = self.get_query()
        logger.debug(query)
        logger.debug(f"Query started at: {datetime.now()}")
        try:
            update_progress(
                self.task_uid,
                progress=0,
                subtask_percentage=subtask_percentage,
                subtask_start=subtask_start,
                eta=eta,
                msg="Querying provider data",
            )
            conf: dict = yaml.safe_load(self.config) or dict()
            session = get_or_update_session(slug=self.slug, **conf)
            req = session.post(self.url, data=query, stream=True)
            if not req.ok:
                # Workaround for https://bugs.python.org/issue27777
                query = {"data": query}
                req = session.post(self.url, data=query, stream=True)
            req.raise_for_status()
            try:
                total_size = int(req.headers.get("content-length"))
            except (ValueError, TypeError):
                if req.content:
                    total_size = len(req.content)
                else:
                    raise Exception("Overpass Query failed to return any data")

            # Since the request takes a while, jump progress to a very high percent...
            query_percent = 85.0
            download_percent = 100.0 - query_percent
            update_progress(
                self.task_uid,
                progress=query_percent,
                subtask_percentage=subtask_percentage,
                subtask_start=subtask_start,
                eta=eta,
                msg="Downloading data from provider: 0 of {:.2f} MB(s)".format(total_size / float(1e6)),
            )

            CHUNK = 1024 * 1024 * 2  # 2MB chunks
            update_interval = 1024 * 1024 * 250  # Every 250 MB

            written_size = 0
            last_update = 0
            with logging_open(self.raw_osm, "wb", user_details=user_details) as fd:
                for chunk in req.iter_content(CHUNK):
                    fd.write(chunk)
                    written_size += CHUNK

                    # Limit the number of calls to update_progress because every time update_progress is called,
                    # the ExportTask model is updated, causing django_audit_logging to update the audit way to much
                    # (via the post_save hook). In the future, we might try still using update progress just as much
                    # but update the model less to make the audit log less spammed, or making audit_logging only log
                    # certain model changes rather than logging absolutely everything.
                    last_update += CHUNK
                    if last_update > update_interval:
                        last_update = 0
                        progress = query_percent + (float(written_size) / float(total_size) * download_percent)
                        update_progress(
                            self.task_uid,
                            progress=progress,
                            subtask_percentage=subtask_percentage,
                            subtask_start=subtask_start,
                            eta=eta,
                            msg="Downloading data from provider: {:.2f} of {:.2f} MB(s)".format(
                                written_size / float(1e6), total_size / float(1e6)
                            ),
                        )

            # Done w/ this subtask
            update_progress(
                self.task_uid,
                progress=100,
                subtask_percentage=subtask_percentage,
                subtask_start=subtask_start,
                eta=eta,
                msg="Completed downloading data from provider",
            )
        except exceptions.RequestException as e:
            logger.error("Overpass query threw: {0}".format(e))
            raise exceptions.RequestException(e)
        finally:
            if req:
                req.close()

        logger.debug(f"Query finished at {datetime.now()}")
        logger.debug(f"Wrote overpass query results to: {self.raw_osm}")
        return self.raw_osm