Ejemplo n.º 1
0
    def log_step(self, progress):
        from eventkit_cloud.tasks.export_tasks import update_progress
        self.eta.update(
            progress.progress
        )  # This may also get called by update_progress but because update_progress
        # is rate-limited; we also do it here to get more data points for making
        # better eta estimates

        if self.task_uid:
            if self.log_step_counter == 0:
                update_progress(self.task_uid,
                                progress=progress.progress * 100,
                                eta=self.eta)
                self.log_step_counter = self.log_step_step
            self.log_step_counter -= 1

        # Old version of super.log_step that includes ETA string
        # https://github.com/mapproxy/mapproxy/commit/93bc53a01318cd63facdb4ee13968caa847a5c17
        if not self.verbose:
            return
        if (self._laststep + .5) < time.time():
            # log progress at most every 500ms
            self.out.write('[%s] %6.2f%%\t%-20s ETA: %s\r' %
                           (timestamp(), progress.progress * 100,
                            progress.progress_str, self.eta))
            self.out.flush()
            self._laststep = time.time()
Ejemplo n.º 2
0
 def log_step(self, progress):
     from eventkit_cloud.tasks.export_tasks import update_progress
     if self.task_uid:
         if self.log_step_counter == 0:
             update_progress(self.task_uid,
                             progress=progress.progress * 100)
             self.log_step_counter = self.log_step_step
         self.log_step_counter -= 1
     super(CustomLogger, self).log_step(progress)
Ejemplo n.º 3
0
 def test_update_progress(self, export_task, mock_close, mock_set_cache_value):
     export_provider_task = DataProviderTaskRecord.objects.create(
         run=self.run,
         name='test_provider_task'
     )
     saved_export_task_uid = ExportTaskRecord.objects.create(
         export_provider_task=export_provider_task,
         status=TaskStates.PENDING.value,
         name="test_task"
     ).uid
     estimated = timezone.now()
     update_progress(saved_export_task_uid, progress=50, estimated_finish=estimated)
     mock_close.assert_called_once()
     mock_set_cache_value.assert_has_calls([call(uid=saved_export_task_uid, attribute='progress', model_name='ExportTaskRecord', value=50),
                                           call(uid=saved_export_task_uid, attribute='estimated_finish',
                                                model_name='ExportTaskRecord', value=estimated)])
Ejemplo n.º 4
0
 def test_update_progress(self, export_task, mock_close,
                          mock_set_cache_value):
     export_provider_task = DataProviderTaskRecord.objects.create(
         run=self.run, name='test_provider_task')
     saved_export_task_uid = ExportTaskRecord.objects.create(
         export_provider_task=export_provider_task,
         status=TaskStates.PENDING.value,
         name="test_task").uid
     estimated = timezone.now()
     update_progress(saved_export_task_uid,
                     progress=50,
                     estimated_finish=estimated)
     mock_close.assert_called_once()
     mock_set_cache_value.assert_has_calls([
         call(uid=saved_export_task_uid,
              attribute='progress',
              model_name='ExportTaskRecord',
              value=50),
         call(uid=saved_export_task_uid,
              attribute='estimated_finish',
              model_name='ExportTaskRecord',
              value=estimated)
     ])
Ejemplo n.º 5
0
    def log_step(self, progress):
        from eventkit_cloud.tasks.export_tasks import update_progress
        self.eta.update(progress.progress)  # This may also get called by update_progress but because update_progress
                                            # is rate-limited; we also do it here to get more data points for making
                                            # better eta estimates

        if self.task_uid:
            if self.log_step_counter == 0:
                update_progress(self.task_uid, progress=progress.progress * 100, eta=self.eta)
                self.log_step_counter = self.log_step_step
            self.log_step_counter -= 1

        # Old version of super.log_step that includes ETA string
        # https://github.com/mapproxy/mapproxy/commit/93bc53a01318cd63facdb4ee13968caa847a5c17
        if not self.verbose:
            return
        if (self._laststep + .5) < time.time():
            # log progress at most every 500ms
            self.out.write('[%s] %6.2f%%\t%-20s ETA: %s\r' % (
                timestamp(), progress.progress*100, progress.progress_str,
                self.eta
            ))
            self.out.flush()
            self._laststep = time.time()
Ejemplo n.º 6
0
    def run_query(self,
                  user_details=None,
                  subtask_percentage=100,
                  subtask_start=0,
                  eta=None):
        """
        Run the overpass query.
        subtask_percentage is the percentage of the task referenced by self.task_uid this method takes up.
            Used to update progress.

        Return:
            the path to the overpass extract
        """
        from eventkit_cloud.tasks.export_tasks import update_progress
        from audit_logging.file_logging import logging_open

        # This is just to make it easier to trace when user_details haven't been sent
        if user_details is None:
            user_details = {'username': '******'}

        req = None
        q = self.get_query()
        logger.debug(q)
        logger.debug('Query started at: %s'.format(datetime.now()))
        try:
            update_progress(self.task_uid,
                            progress=0,
                            subtask_percentage=subtask_percentage,
                            subtask_start=subtask_start,
                            eta=eta,
                            msg='Querying provider data')

            req = auth_requests.post(self.url,
                                     slug=self.slug,
                                     data=q,
                                     stream=True,
                                     verify=self.verify_ssl)

            try:
                total_size = int(req.headers.get('content-length'))
            except (ValueError, TypeError):
                if req.content:
                    total_size = len(req.content)
                else:
                    raise Exception("Overpass Query failed to return any data")

            # Since the request takes a while, jump progress to a very high percent...
            query_percent = 85.0
            download_percent = 100.0 - query_percent
            update_progress(
                self.task_uid,
                progress=query_percent,
                subtask_percentage=subtask_percentage,
                subtask_start=subtask_start,
                eta=eta,
                msg='Downloading data from provider: 0 of {:.2f} MB(s)'.format(
                    total_size / float(1e6)))

            CHUNK = 1024 * 1024 * 2  # 2MB chunks
            update_interval = 1024 * 1024 * 250  # Every 250 MB

            written_size = 0
            last_update = 0
            with logging_open(self.raw_osm, 'wb',
                              user_details=user_details) as fd:
                for chunk in req.iter_content(CHUNK):
                    fd.write(chunk)
                    written_size += CHUNK

                    # Limit the number of calls to update_progress because every time update_progress is called,
                    # the ExportTask model is updated, causing django_audit_logging to update the audit way to much
                    # (via the post_save hook). In the future, we might try still using update progress just as much
                    # but update the model less to make the audit log less spammed, or making audit_logging only log
                    # certain model changes rather than logging absolutely everything.
                    last_update += CHUNK
                    if last_update > update_interval:
                        last_update = 0
                        progress = query_percent + (float(written_size) /
                                                    float(total_size) *
                                                    download_percent)
                        update_progress(
                            self.task_uid,
                            progress=progress,
                            subtask_percentage=subtask_percentage,
                            subtask_start=subtask_start,
                            eta=eta,
                            msg=
                            'Downloading data from provider: {:.2f} of {:.2f} MB(s)'
                            .format(written_size / float(1e6),
                                    total_size / float(1e6)))

            # Done w/ this subtask
            update_progress(self.task_uid,
                            progress=100,
                            subtask_percentage=subtask_percentage,
                            subtask_start=subtask_start,
                            eta=eta,
                            msg='Completed downloading data from provider')
        except exceptions.RequestException as e:
            logger.error('Overpass query threw: {0}'.format(e))
            raise exceptions.RequestException(e)
        finally:
            if req:
                req.close()

        logger.debug('Query finished at %s'.format(datetime.now()))
        logger.debug('Wrote overpass query results to: %s'.format(
            self.raw_osm))
        return self.raw_osm
Ejemplo n.º 7
0
    def run_query(self, user_details=None, subtask_percentage=100):
        """
        Run the overpass query.
        subtask_percentage is the percentage of the task referenced by self.task_uid this method takes up.
            Used to update progress.

        Return:
            the path to the overpass extract
        """

        # This is just to make it easier to trace when user_details haven't been sent
        if user_details is None:
            user_details = {'username': '******'}

        from eventkit_cloud.tasks.export_tasks import update_progress

        q = self.get_query()
        logger.debug(q)
        logger.debug('Query started at: %s'.format(datetime.now()))
        try:
            req = auth_requests.post(self.url,
                                     slug=self.slug,
                                     data=q,
                                     stream=True,
                                     verify=self.verify_ssl)

            # Since the request takes a while, jump progress to an arbitrary 50 percent...
            update_progress(self.task_uid,
                            progress=50,
                            subtask_percentage=subtask_percentage)
            try:
                size = int(req.headers.get('content-length'))
            except (ValueError, TypeError):
                if req.content:
                    size = len(req.content)
                else:
                    raise Exception("Overpass Query failed to return any data")
            inflated_size = size * 2
            CHUNK = 1024 * 1024 * 2  # 2MB chunks
            from audit_logging.file_logging import logging_open
            with logging_open(self.raw_osm, 'wb',
                              user_details=user_details) as fd:
                for chunk in req.iter_content(CHUNK):
                    fd.write(chunk)
                    size += CHUNK
                    # removing this call to update_progress for now because every time update_progress is called,
                    # the ExportTask model is updated, causing django_audit_logging to update the audit way to much
                    # (via the post_save hook). In the future, we might try still using update progress just as much
                    # but update the model less to make the audit log less spammed, or making audit_logging only log
                    # certain model changes rather than logging absolutely everything.
                    ## Because progress is already at 50, we need to make this part start at 50 percent
                    #update_progress(
                    #    self.task_uid, progress=(float(size) / float(inflated_size)) * 100,
                    #    subtask_percentage=subtask_percentage
                    #)
        except exceptions.RequestException as e:
            logger.error('Overpass query threw: {0}'.format(e))
            raise exceptions.RequestException(e)

        logger.debug('Query finished at %s'.format(datetime.now()))
        logger.debug('Wrote overpass query results to: %s'.format(
            self.raw_osm))
        return self.raw_osm
Ejemplo n.º 8
0
    def run(self, subtask_percentage=100, subtask_start=0, eta=None):
        """
        Create the GeoPackage from the osm data.
        """

        # avoiding a circular import
        from eventkit_cloud.tasks.export_tasks import update_progress

        if self.is_complete:
            LOG.debug("Skipping Geopackage, file exists")
            return
        keys_points = self.feature_selection.key_union('points')
        keys_lines = self.feature_selection.key_union('lines')
        keys_polygons = self.feature_selection.key_union('polygons')
        osmconf = OSMConfig(self.stage_dir, points=keys_points, lines=keys_lines, polygons=keys_polygons)
        conf = osmconf.create_osm_conf()
        ogr_cmd = self.ogr_cmd.safe_substitute({'gpkg': self.output_gpkg,
                                                'osm': self.input_pbf, 'osmconf': conf})
        LOG.debug('Running: %s' % ogr_cmd)
        subprocess.check_call(ogr_cmd, shell=True, executable='/bin/bash')

        """
        Create the default osm gpkg schema
        """
        conn = sqlite3.connect(self.output_gpkg)
        conn.enable_load_extension(True)
        cur = conn.cursor()
        cur.execute("select load_extension('mod_spatialite')")
        cur.execute("CREATE TABLE boundary (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, geom GEOMETRY)");
        cur.execute("INSERT INTO boundary (geom) VALUES (GeomFromWKB(?,4326));", (self.aoi_geom.wkb,))
        update_progress(self.export_task_record_uid, 30, subtask_percentage, subtask_start, eta=eta)

        cur.executescript(SPATIAL_SQL)
        self.update_zindexes(cur, self.feature_selection)
        update_progress(self.export_task_record_uid, 42, subtask_percentage, subtask_start, eta=eta)

        # add themes
        create_sqls, index_sqls = self.feature_selection.sqls
        for query in create_sqls:
            LOG.debug(query)
            cur.executescript(query)
        update_progress(self.export_task_record_uid, 50, subtask_percentage, subtask_start, eta=eta)

        for query in index_sqls:
            LOG.debug(query)
            cur.executescript(query)

        """
        Remove points/lines/multipolygons tables
        """
        cur.execute("DROP TABLE points")
        cur.execute("DROP TABLE lines")
        cur.execute("DROP TABLE multipolygons")

        conn.commit()
        conn.close()

        if self.per_theme:
            # this creates per-theme GPKGs
            for theme in self.feature_selection.themes:
                conn = sqlite3.connect(self.stage_dir + slugify(theme) + ".gpkg")
                conn.enable_load_extension(True)
                cur = conn.cursor()
                cur.execute("attach database ? as 'geopackage'", (self.output_gpkg,))
                cur.execute("create table gpkg_spatial_ref_sys as select * from geopackage.gpkg_spatial_ref_sys")
                cur.execute("create table gpkg_contents as select * from geopackage.gpkg_contents where 0")
                cur.execute(
                    "create table gpkg_geometry_columns as select * from geopackage.gpkg_geometry_columns where 0")
                for geom_type in self.feature_selection.geom_types(theme):
                    for stmt in self.feature_selection.create_sql(theme, geom_type):
                        cur.executescript(stmt)
                conn.commit()
                conn.close()

        update_progress(self.export_task_record_uid, 100, subtask_percentage, subtask_start, eta=eta)