async def _queue_pattern_ltd_product_ingest( *, session: ClientSession, logger: BoundLoggerLazyProxy, config: Configuration, producer: AIOKafkaProducer, schema_manager: RecordNameSchemaManager, product_pattern: str, edition_slug: str, ) -> None: """Queue a LTD-based documents with product slugs matching a regular expression pattern for ingest in the ook.ingest Kafka topic. """ product_data = await _get_json(session=session, url="https://keeper.lsst.codes/products/") url_prefix = "https://keeper.lsst.codes/products/" all_products = [p[len(url_prefix):] for p in product_data["products"]] pattern = re.compile(product_pattern) matching_products = [ p for p in all_products if pattern.match(p) is not None ] logger.info("Matched products", product_slugs=matching_products) await _queue_list_ltd_product_ingest( session=session, logger=logger, config=config, producer=producer, schema_manager=schema_manager, product_slugs=matching_products, edition_slug=edition_slug, )
async def send_tasks_to_worker(log: BoundLoggerLazyProxy, redis_app: Redis, arg: Optional[str], now: datetime, timestamp: Optional[datetime], timeout: int, handler_type: str, lang: str) -> None: # updates conditions condition = timestamp is None or (now - timestamp).total_seconds() > timeout # make updates (send tasks to worker) if handler_type == 'static_category' and condition: log.info('Send task to worker', stream=REDIS_STREAM_CHALLENGES, lang=lang) await redis_app.xadd(REDIS_STREAM_CHALLENGES, { b'lang': lang.encode(), b'update': b"ok" }) elif handler_type == 'dynamic_user' and arg is not None and condition: await redis_app.xadd(REDIS_STREAM_USERS, { b'lang': lang.encode(), b'username': arg.encode() }) log.info('Send task to worker', stream=REDIS_STREAM_USERS, username=arg, lang=lang) elif handler_type == 'dynamic_categories' and arg is not None and condition: await redis_app.xadd(REDIS_STREAM_CHALLENGES, { b'lang': lang.encode(), b'update': b"ok" }) log.info('Send task to worker', stream=REDIS_STREAM_CHALLENGES, lang=lang)
async def _queue_single_ltd_product_ingest( *, session: ClientSession, logger: BoundLoggerLazyProxy, config: Configuration, producer: AIOKafkaProducer, schema_manager: RecordNameSchemaManager, product_slug: str, edition_slug: str, ) -> None: """Queue an LTD-based document for ingest in the ook.ingest Kafka topic.""" product_data = await _get_json( session=session, url=f"https://keeper.lsst.codes/products/{product_slug}", ) edition_urls = await _get_json( session=session, url=f"https://keeper.lsst.codes/products/{product_slug}/editions/", ) for edition_url in edition_urls["editions"]: edition_data = await _get_json(session=session, url=edition_url) if edition_data["slug"] == edition_slug: break if edition_data["slug"] != edition_slug: raise RuntimeError( "Could not find slug {edition_slug} for product {product_slug}") content_type = await classify_ltd_site( http_session=session, product_slug=product_slug, published_url=edition_data["published_url"], ) ltd_document_types = { ContentType.LTD_LANDER_JSONLD, ContentType.LTD_SPHINX_TECHNOTE, } if content_type not in ltd_document_types: logger.warning( "Cannot do triggered ingest of a non-document " "LTD product.", content_type=content_type.name, ) return key = {"url": edition_data["published_url"]} value = { "content_type": content_type.name, "request_timestamp": datetime.datetime.utcnow(), "update_timestamp": datetime.datetime.utcnow(), "url": edition_data["published_url"], "edition": { "url": edition_data["self_url"], "published_url": edition_data["published_url"], "slug": edition_slug, "build_url": edition_data["build_url"], }, "product": { "url": product_data["self_url"], "published_url": edition_data["published_url"], "slug": product_slug, }, } key_data = await schema_manager.serialize(data=key, name="ook.url_key_v1") value_data = await schema_manager.serialize(data=value, name="ook.ltd_url_ingest_v1") # Produce message topic_name = config.ingest_kafka_topic await producer.send_and_wait(topic_name, key=key_data, value=value_data) logger.info( "Produced an LTD document URL ingest request", topic=topic_name, url=value["url"], )