Exemple #1
0
async def _queue_pattern_ltd_product_ingest(
    *,
    session: ClientSession,
    logger: BoundLoggerLazyProxy,
    config: Configuration,
    producer: AIOKafkaProducer,
    schema_manager: RecordNameSchemaManager,
    product_pattern: str,
    edition_slug: str,
) -> None:
    """Queue a LTD-based documents with product slugs matching a regular
    expression pattern for ingest in the ook.ingest Kafka topic.
    """
    product_data = await _get_json(session=session,
                                   url="https://keeper.lsst.codes/products/")
    url_prefix = "https://keeper.lsst.codes/products/"
    all_products = [p[len(url_prefix):] for p in product_data["products"]]
    pattern = re.compile(product_pattern)
    matching_products = [
        p for p in all_products if pattern.match(p) is not None
    ]
    logger.info("Matched products", product_slugs=matching_products)
    await _queue_list_ltd_product_ingest(
        session=session,
        logger=logger,
        config=config,
        producer=producer,
        schema_manager=schema_manager,
        product_slugs=matching_products,
        edition_slug=edition_slug,
    )
Exemple #2
0
async def send_tasks_to_worker(log: BoundLoggerLazyProxy, redis_app: Redis,
                               arg: Optional[str], now: datetime,
                               timestamp: Optional[datetime], timeout: int,
                               handler_type: str, lang: str) -> None:
    # updates conditions
    condition = timestamp is None or (now -
                                      timestamp).total_seconds() > timeout

    # make updates (send tasks to worker)
    if handler_type == 'static_category' and condition:
        log.info('Send task to worker',
                 stream=REDIS_STREAM_CHALLENGES,
                 lang=lang)
        await redis_app.xadd(REDIS_STREAM_CHALLENGES, {
            b'lang': lang.encode(),
            b'update': b"ok"
        })
    elif handler_type == 'dynamic_user' and arg is not None and condition:
        await redis_app.xadd(REDIS_STREAM_USERS, {
            b'lang': lang.encode(),
            b'username': arg.encode()
        })
        log.info('Send task to worker',
                 stream=REDIS_STREAM_USERS,
                 username=arg,
                 lang=lang)
    elif handler_type == 'dynamic_categories' and arg is not None and condition:
        await redis_app.xadd(REDIS_STREAM_CHALLENGES, {
            b'lang': lang.encode(),
            b'update': b"ok"
        })
        log.info('Send task to worker',
                 stream=REDIS_STREAM_CHALLENGES,
                 lang=lang)
Exemple #3
0
async def _queue_single_ltd_product_ingest(
    *,
    session: ClientSession,
    logger: BoundLoggerLazyProxy,
    config: Configuration,
    producer: AIOKafkaProducer,
    schema_manager: RecordNameSchemaManager,
    product_slug: str,
    edition_slug: str,
) -> None:
    """Queue an LTD-based document for ingest in the ook.ingest Kafka topic."""
    product_data = await _get_json(
        session=session,
        url=f"https://keeper.lsst.codes/products/{product_slug}",
    )
    edition_urls = await _get_json(
        session=session,
        url=f"https://keeper.lsst.codes/products/{product_slug}/editions/",
    )
    for edition_url in edition_urls["editions"]:
        edition_data = await _get_json(session=session, url=edition_url)
        if edition_data["slug"] == edition_slug:
            break
    if edition_data["slug"] != edition_slug:
        raise RuntimeError(
            "Could not find slug {edition_slug} for product {product_slug}")

    content_type = await classify_ltd_site(
        http_session=session,
        product_slug=product_slug,
        published_url=edition_data["published_url"],
    )
    ltd_document_types = {
        ContentType.LTD_LANDER_JSONLD,
        ContentType.LTD_SPHINX_TECHNOTE,
    }
    if content_type not in ltd_document_types:
        logger.warning(
            "Cannot do triggered ingest of a non-document "
            "LTD product.",
            content_type=content_type.name,
        )
        return

    key = {"url": edition_data["published_url"]}
    value = {
        "content_type": content_type.name,
        "request_timestamp": datetime.datetime.utcnow(),
        "update_timestamp": datetime.datetime.utcnow(),
        "url": edition_data["published_url"],
        "edition": {
            "url": edition_data["self_url"],
            "published_url": edition_data["published_url"],
            "slug": edition_slug,
            "build_url": edition_data["build_url"],
        },
        "product": {
            "url": product_data["self_url"],
            "published_url": edition_data["published_url"],
            "slug": product_slug,
        },
    }
    key_data = await schema_manager.serialize(data=key, name="ook.url_key_v1")
    value_data = await schema_manager.serialize(data=value,
                                                name="ook.ltd_url_ingest_v1")
    # Produce message
    topic_name = config.ingest_kafka_topic
    await producer.send_and_wait(topic_name, key=key_data, value=value_data)
    logger.info(
        "Produced an LTD document URL ingest request",
        topic=topic_name,
        url=value["url"],
    )