def process_pkg(db_cursor, input_pkg):
            table_name = input_pkg['id'].replace("-", "_")

            results = db_cursor.execute(
                u'''SELECT 1 FROM "_table_metadata" where name = {tab_name} and alias_of is null'''.format(
                    tab_name=table_name))
            res_exists = results.rowcount > 0

            if res_exists:
                log.debug("{0} appears to contain a legacy spatial ingestion.".format(pkg_dict['name']))
                # We have a table that exists in the PostGIS DB
                pkg_raw = model.Package.get(input_pkg['id'])

                if pkg_raw.state != 'deleted':
                    for res_raw in pkg_raw.resources:
                        res_dict = res_raw.as_dict()
                        if "http://data.gov.au/geoserver/" in res_dict.get('url', ''):
                            toolkit.get_action('resource_delete')({'ignore_auth': True}, res_dict)

                res = requests.delete(geoserver_wsurl + '/' + input_pkg['name'] + '?recurse=true&quietOnNotFound',
                                      auth=geoserver_credentials)

                log.info("Geoserver recursive workspace deletion returned {0}".format(res))

                db_cursor.execute("DROP TABLE IF EXISTS {tab_name}".format(tab_name=table_name))

                log.ingo("Dropped SQL table {0}".format(table_name))
    def _purge(self, pkg_id):
        pkg_dict = model.Package.get(pkg_id).as_dict()

        log.info("Purging spatially ingested resources from package {0}...".format(pkg_dict['name']))

        context = {'user': toolkit.get_action('user_show')({'ignore_auth': True}, {
            'id': config.get('ckan.spatialingestor.ckan_user', 'default')
        })}

        for res in pkg_dict['resources']:
            purge_resource(context, res['id'])
    def _reingest_all(self):
        context = {'user': toolkit.get_action('user_show')({'ignore_auth': True}, {
            'id': config.get('ckan.spatialingestor.ckan_user', 'default')
        })}

        pkg_ids = [r[0] for r in model.Session.query(model.Package.id).filter(model.Package.state != 'deleted').all()]

        log.info("Re-ingesting spatial resources for all packages...")

        total_packages = len(pkg_ids)
        for counter, pkg_id in enumerate(pkg_ids):
            sys.stdout.write("\rRe-ingesting spatial resources for dataset {0}/{1}".format(counter + 1, total_packages))
            sys.stdout.flush()
            pkg_dict = model.Package.get(pkg_id).as_dict()
            try:
                for res in pkg_dict['resources']:
                    ingest_resource(context, res['id'], False)
            except Exception, e:
                log.error("Processing {0} failed with error {1}, continuing...".format(pkg_dict['name'], str(e)))
                log.info("Geoserver recursive workspace deletion returned {0}".format(res))

                db_cursor.execute("DROP TABLE IF EXISTS {tab_name}".format(tab_name=table_name))

                log.ingo("Dropped SQL table {0}".format(table_name))

        db_res = get_db_cursor()

        if db_res is None:
            log.error("Failed to open SQL connection to PostGIS DB")
            return None

        cursor, connection = db_res

        pkg_ids = [r[0] for r in model.Session.query(model.Package.id).all()]
        log.info("Migrating legacy spatial ingestion on all packages...")

        total_packages = len(pkg_ids)
        for counter, pkg_id in enumerate(pkg_ids):
            sys.stdout.write("\rProcessing dataset {0}/{1}".format(counter + 1, total_packages))
            sys.stdout.flush()
            pkg_dict = model.Package.get(pkg_id).as_dict()
            try:
                process_pkg(cursor, pkg_dict)
            except Exception, e:
                log.error("Processing {0} failed with error {1}, continuing...".format(pkg_dict['name'], str(e)))

        cursor.close()
        connection.close()

        sys.stdout.write("\n>>> Process complete\n")
Example #5
0
                })
        except toolkit.ValidationError, e:
            log.error(e)
    elif is_spatially_ingestible_resource(resource_dict):
        try:
            dataset = toolkit.get_action('package_show')(
                context, {
                    'id': resource_dict['package_id'],
                })
        except Exception, e:
            log.error(
                "Failed to retrieve package ID: {0} with error {1}".format(
                    resource_dict['package_id'], str(e)))
            return

        log.info("Loaded dataset {0}.".format(dataset['name']))

        # We auto_process spatial file by updating the resource, which will re-trigger this method
        resource_dict['spatial_parent'] = 'True'
        try:
            toolkit.get_action('resource_update')(context, resource_dict)
        except toolkit.ValidationError, e:
            log.error(e)


def purge_resource_datastores(context, resource_dict):
    # Have to be careful about how to delete child resources here in a sense that
    # the Spatialingestor microservice will not be able to query CKAN to find
    # child spatial resources. So, this must be done in this thread with the IDs
    # passed back to the micro-service