Ejemplo n.º 1
0
def scheduler():
    """Run rq-scheduler"""
    redis_client = get_rq_redis_client()
    scheduler = Scheduler(connection=redis_client)

    # Create the RediSearch index and begin indexing immediately.
    # If a previous index exists, delete it.
    tasks.index(config.sites, rebuild_index=True)

    # Schedule an indexing job to run every 30 minutes.
    #
    # This performs an update-in-place using the existing RediSearch index.
    #
    # TODO: We currently don't try to detect if we have outdated content in
    # the index -- i.e. when we reindexed a site, a URL was leftover in the
    # index that we didn't find on this round of indexing.
    #
    # NOTE: We need to define this here, at the time we run this command,
    # because there is no deduplication in the cron() method, and this app has
    # no "exactly once" startup/initialization step that we could use to call
    # code only once.
    scheduler.cron(
        "*/60 * * * *",
        func=tasks.index,
        args=[config.sites, False],
        use_local_timezone=True,
        timeout=tasks.INDEXING_TIMEOUT
    )

    scheduler.run()
Ejemplo n.º 2
0
def index(site: SiteConfiguration, config: Optional[AppConfiguration] = None, force=False):
    redis_client = get_rq_redis_client()
    if config is None:
        config = AppConfiguration()
    indexer = Indexer(site, config)
    indexer.index(force)

    job = get_current_job()
    if job:
        keys = Keys(prefix=config.key_prefix)
        log.info("Removing indexing job ID: %s", job.id)
        redis_client.srem(keys.startup_indexing_job_ids(), job.id)

    return True
Ejemplo n.º 3
0
import logging

from fastapi import APIRouter, Security, status
from fastapi.exceptions import HTTPException
from rq.exceptions import NoSuchJobError
from rq.registry import StartedJobRegistry
from sitesearch.api.authentication import get_api_key
from sitesearch.cluster_aware_rq import ClusterAwareJob
from sitesearch.connections import get_rq_redis_client

redis_client = get_rq_redis_client()
log = logging.getLogger(__name__)
registry = StartedJobRegistry('default', connection=redis_client)
router = APIRouter()

JOB_QUEUED = 'queued'


@router.get("/jobs/{job_id}", dependencies=[Security(get_api_key)])
async def job(job_id: str):
    """Get the status of a job by its ID."""
    try:
        job = ClusterAwareJob.fetch(job_id, connection=redis_client)
    except NoSuchJobError as e:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND,
                            detail="Job not found") from e

    return {
        "id": job_id,
        "url": job.args[0].url,
        "status": job.get_status(),
Ejemplo n.º 4
0
def scheduler():
    """Run rq-scheduler"""
    redis_client = get_rq_redis_client()
    scheduler = Scheduler(connection=redis_client)
    schedule(scheduler, redis_client, config)
    scheduler.run()