def scheduler(): """Run rq-scheduler""" redis_client = get_rq_redis_client() scheduler = Scheduler(connection=redis_client) # Create the RediSearch index and begin indexing immediately. # If a previous index exists, delete it. tasks.index(config.sites, rebuild_index=True) # Schedule an indexing job to run every 30 minutes. # # This performs an update-in-place using the existing RediSearch index. # # TODO: We currently don't try to detect if we have outdated content in # the index -- i.e. when we reindexed a site, a URL was leftover in the # index that we didn't find on this round of indexing. # # NOTE: We need to define this here, at the time we run this command, # because there is no deduplication in the cron() method, and this app has # no "exactly once" startup/initialization step that we could use to call # code only once. scheduler.cron( "*/60 * * * *", func=tasks.index, args=[config.sites, False], use_local_timezone=True, timeout=tasks.INDEXING_TIMEOUT ) scheduler.run()
def index(site: SiteConfiguration, config: Optional[AppConfiguration] = None, force=False): redis_client = get_rq_redis_client() if config is None: config = AppConfiguration() indexer = Indexer(site, config) indexer.index(force) job = get_current_job() if job: keys = Keys(prefix=config.key_prefix) log.info("Removing indexing job ID: %s", job.id) redis_client.srem(keys.startup_indexing_job_ids(), job.id) return True
import logging from fastapi import APIRouter, Security, status from fastapi.exceptions import HTTPException from rq.exceptions import NoSuchJobError from rq.registry import StartedJobRegistry from sitesearch.api.authentication import get_api_key from sitesearch.cluster_aware_rq import ClusterAwareJob from sitesearch.connections import get_rq_redis_client redis_client = get_rq_redis_client() log = logging.getLogger(__name__) registry = StartedJobRegistry('default', connection=redis_client) router = APIRouter() JOB_QUEUED = 'queued' @router.get("/jobs/{job_id}", dependencies=[Security(get_api_key)]) async def job(job_id: str): """Get the status of a job by its ID.""" try: job = ClusterAwareJob.fetch(job_id, connection=redis_client) except NoSuchJobError as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Job not found") from e return { "id": job_id, "url": job.args[0].url, "status": job.get_status(),
def scheduler(): """Run rq-scheduler""" redis_client = get_rq_redis_client() scheduler = Scheduler(connection=redis_client) schedule(scheduler, redis_client, config) scheduler.run()