Example #1
0
def rebuild_fast():
    from ckan.lib.search import commit

    db_url = config['sqlalchemy.url']
    engine = sa.create_engine(db_url)
    package_ids = []
    result = engine.execute(u"select id from package where state = 'active';")
    for row in result:
        package_ids.append(row[0])

    def start(ids: list[str]):
        from ckan.lib.search import rebuild
        rebuild(package_ids=ids)

    def chunks(list_: list[str], n: int):
        u""" Yield n successive chunks from list_"""
        newn = int(len(list_) / n)
        for i in range(0, n - 1):
            yield list_[i * newn:i * newn + newn]
        yield list_[n * newn - newn:]

    processes = []

    try:
        for chunk in chunks(package_ids, mp.cpu_count()):
            process = mp.Process(target=start, args=(chunk, ))
            processes.append(process)
            process.daemon = True
            process.start()

        for process in processes:
            process.join()
        commit()
    except Exception as e:
        error_shout(e)
def rebuild(verbose, force, refresh, only_missing, quiet, commit_each):
    u''' Rebuild search index '''
    from ckan.lib.search import rebuild, commit
    try:
        rebuild(only_missing=only_missing,
                force=force,
                refresh=refresh,
                defer_commit=(not commit_each),
                quiet=quiet)
    except Exception as e:
        tk.error_shout(e)
    if not commit_each:
        commit()
Example #3
0
    def publish_ogc_worker(self):
        '''
        Publish dataset wms/wfs to geoserver by pop-ing
        an element (dataset id) from the publis_ogc_queue(redis)
        '''

        print str(datetime.datetime.now()
                  ) + ' PUBLISH_OGC_WORKER: Started the worker process'
        # flush stdout see https://github.com/Supervisor/supervisor/issues/13
        sys.stdout.flush()
        try:
            r = self._redis_connection()
        except:
            print str(datetime.datetime.now(
            )) + ' PUBLISH_OGC_WORKER: ERROR, could not connect to Redis '
            sys.stdout.flush()

        # Lovely infinite loop ;P, we do need them from time to time
        while True:
            # POP an element (package_id) from publis_ogc_queue and publish it to ogc
            try:
                # we need to slow down this loop by setting the blpop timeout to 5 seconds
                # when publish_ogc_queue is empty

                queue_task = r.blpop('publish_ogc_queue', 5)

                if queue_task is not None:
                    package_id = queue_task[1]
                    print str(
                        datetime.datetime.now()
                    ) + ' PUBLISH_OGC_WORKER: Start publishing dataset: ' + package_id
                    sys.stdout.flush()

                    self.publish_ogc(package_id)
                    print str(
                        datetime.datetime.now()
                    ) + ' PUBLISH_OGC_WORKER: finished publishing now index: ' + package_id
                    sys.stdout.flush()
                    # rebuild solr index for this dataset to avoid duplicate datasets in search results
                    rebuild(package_id)
                    commit()

            except:
                print str(
                    datetime.datetime.now()
                ) + ' PUBLISH_OGC_WORKER: An Error has occured while publishing dataset:' + package_id + ' to GeoServer'
                sys.stdout.flush()
                # retry in 30 seconds if something went south
                time.sleep(30)
Example #4
0
def rebuild(ctx, verbose, force, refresh, only_missing, quiet, commit_each):
    u''' Rebuild search index '''
    flask_app = ctx.obj.app.apps['flask_app']._wsgi_app
    from ckan.lib.search import rebuild, commit
    try:
        with flask_app.test_request_context():
            rebuild(only_missing=only_missing,
                    force=force,
                    refresh=refresh,
                    defer_commit=(not commit_each),
                    quiet=quiet)
    except Exception as e:
        error_shout(e)
    if not commit_each:
        commit()
Example #5
0
def rebuild(ctx, verbose, force, refresh, only_missing, quiet, commit_each):
    u''' Rebuild search index '''
    flask_app = ctx.obj.app.apps['flask_app']._wsgi_app
    from ckan.lib.search import rebuild, commit
    try:
        with flask_app.test_request_context():
            rebuild(only_missing=only_missing,
                    force=force,
                    refresh=refresh,
                    defer_commit=(not commit_each),
                    quiet=quiet)
    except Exception as e:
        error_shout(e)
    if not commit_each:
        commit()
Example #6
0
def rebuild(verbose: bool, force: bool, only_missing: bool, quiet: bool,
            commit_each: bool, package_id: str, clear: bool):
    u''' Rebuild search index '''
    from ckan.lib.search import rebuild, commit
    try:

        rebuild(package_id,
                only_missing=only_missing,
                force=force,
                defer_commit=(not commit_each),
                quiet=quiet,
                clear=clear)
    except Exception as e:
        error_shout(e)
    if not commit_each:
        commit()
Example #7
0
    def rebuild(self):
        from ckan.lib.search import rebuild, commit

        # BY default we don't commit after each request to Solr, as it is
        # a really heavy operation and slows things a lot

        if len(self.args) > 1:
            rebuild(self.args[1])
        else:
            rebuild(only_missing=self.options.only_missing,
                    force=self.options.force,
                    refresh=self.options.refresh,
                    defer_commit=(not self.options.commit_each))

        if not self.options.commit_each:
            commit()
Example #8
0
File: cli.py Project: abulte/ckan
    def rebuild(self):
        from ckan.lib.search import rebuild, commit

        # BY default we don't commit after each request to Solr, as it is
        # a really heavy operation and slows things a lot

        if len(self.args) > 1:
            rebuild(self.args[1])
        else:
            rebuild(only_missing=self.options.only_missing,
                    force=self.options.force,
                    refresh=self.options.refresh,
                    defer_commit=(not self.options.commit_each))

        if not self.options.commit_each:
            commit()
    def publish_ogc_worker(self):
        '''
        Publish dataset wms/wfs to geoserver by pop-ing
        an element (dataset id) from the publis_ogc_queue(redis)
        ''' 

        print str(datetime.datetime.now()) + ' PUBLISH_OGC_WORKER: Started the worker process'
        # flush stdout see https://github.com/Supervisor/supervisor/issues/13
        sys.stdout.flush()
        try:
            r = self._redis_connection()
        except:
            print str(datetime.datetime.now()) + ' PUBLISH_OGC_WORKER: ERROR, could not connect to Redis '
            sys.stdout.flush()
            

        # Lovely infinite loop ;P, we do need them from time to time
        while True:
            # POP an element (package_id) from publis_ogc_queue and publish it to ogc
            try:
                # we need to slow down this loop by setting the blpop timeout to 5 seconds
                # when publish_ogc_queue is empty
                
                queue_task = r.blpop('publish_ogc_queue', 5) 

                if queue_task is not None:
                    package_id = queue_task[1] 
                    print str(datetime.datetime.now()) + ' PUBLISH_OGC_WORKER: Start publishing dataset: ' + package_id
                    sys.stdout.flush()
                    self.publish_ogc(package_id)

                    # rebuild solr index for this dataset to avoid duplicate datasets in search results
                    rebuild(package_id)
                    commit()


            except:
                print str(datetime.datetime.now()) + ' PUBLISH_OGC_WORKER: An Error has occured while publishing dataset:' + package_id + ' to GeoServer'
                sys.stdout.flush()
                # retry in 30 seconds if something went south
                time.sleep(30)
Example #10
0
 def start(ids):
     from ckan.lib.search import rebuild, commit
     rebuild(package_ids=ids)
     commit()
Example #11
0
 def start(ids):
     from ckan.lib.search import rebuild, commit
     rebuild(package_ids=ids)
     commit()
Example #12
0
    def format_mapping(self):
        try:
            tk.check_access('sysadmin', {'user': g.user, model: model})
        except tk.NotAuthorized:
            return tk.abort(403)
        if request.method == 'POST':
            old = request.POST.get('from')
            new = request.POST.get('to')
            if old and new:
                ids = set()
                res_query = model.Session.query(model.Resource).filter_by(
                    format=old, state='active'
                )
                for res in res_query:
                    ids.add(res.package_id)

                res_query.update({'format': new})
                model.Session.commit()
                for id in ids:
                    clear(id)
                    rebuild(id, defer_commit=True)
                commit()
                tk.h.flash_success(
                    'Updated. Records changed: {}'.format(len(ids))
                )
            return tk.redirect_to('format_mapping')

        defined = set(
            map(lambda (_1, fmt, _3): fmt,
                h.resource_formats().values())
        )
        db_formats = model.Session.query(
            model.Resource.format, func.count(model.Resource.id),
            func.count(model.PackageExtra.value)
        ).outerjoin(
            model.PackageExtra,
            (model.Resource.package_id == model.PackageExtra.package_id)
            & ((model.PackageExtra.key == 'harvest_portal')
               | (model.PackageExtra.key.is_(None)))
        ).group_by(model.Resource.format).filter(
            model.Resource.format != '', model.Resource.state == 'active'
        )
        db_formats = db_formats.all()

        format_types = {
            f: {
                True: 'Partially external',
                e == 0: 'Local',
                t - e == 0: 'External'
            }[True]
            for (f, t, e) in db_formats
        }
        used = set(format_types)
        undefined = used - defined

        extra_vars = {
            'undefined': undefined,
            'defined': defined,
            'format_types': format_types
        }
        return tk.render('admin/format_mapping.html', extra_vars)