from collections import deque from time import sleep from typing import Any, Deque # noqa from cranial.common import logger from cranial.datastore.dbapi import Param, render_params from cranial.listeners import base log = logger.create('db_listener', logger.fallback('LISTENERS_LOGLEVEL', 'WARNING')) class Listener(base.Listener): def __init__(self, cursor, table: str, id: str = 'id', last_id: Any = None, limit=1000, sleep=10, select='*', where=None, **kwargs) -> None: """ Parameters ---------- cursor A DBAPI2 cursor. table A table to poll for new rows.
from cranial.common import logger from cranial.listeners import base from cranial.messaging.adapters.kafka import get_consumer log = logger.create('kafka_client', logger.fallback('LISTENERS_LOGLEVEL', 'WARNING')) class Listener(base.Listener): """ Listens for messages on a single Kafka topic.""" def __init__(self, topic, consumer=None, **kwargs): self.topic = topic if not consumer: self.consumer = get_consumer(topic=topic, **kwargs) else: consumer.subscribe([topic]) self.consumer = consumer self.empty_messages = 0 def recv(self, timeout=-1, do_raise=False): msg = self.consumer.poll(timeout) err = hasattr(msg, 'error') and msg.error() if err: log.warning(err) if do_raise: raise Exception( 'Error while polling for message: {}'.format(err)) # An application might expect streaming bytes. In the future, we # might revise this to raise an exception which the application
import datetime import os import socket import confluent_kafka as kafka from cranial.common import logger log = logger.create('cranial', os.environ.get('CRANIAL_LOGLEVEL', 'WARN')) class LoggingProducer(kafka.Producer): def logError(self, err, msg): if err: log.error(msg) def produce(self, *args, **kwargs): kwargs['on_delivery'] = self.logError super().produce(*args, **kwargs) class MustCommitConsumer(kafka.Consumer): def __init__(self, config): config['enable.auto.commit'] = False super().__init__(config) class CarefulConsumer(MustCommitConsumer): """A Kafka consumer that refrains from commiting it's previous message until it is asking for a new one. Threads should not share a CarefulConsumer!
""" tokenizers that do not use spaCy """ import subprocess import os import collections import logging from cranial.common import logger from cranial.model_base import ModelBase log = logger.create('tokenizers', os.environ.get('MODELS_LOGLEVEL', logging.WARNING)) # streaming log class MosesTokenizer(ModelBase): name = 'moses_tokenizer' def __init__(self, moses_repo_path, language='en', threads=None, **kwargs): """ This wraps around a moses tokenizer - https://github.com/moses-smt/mosesdecoder Note that it is much faster to transform few large chunks of text instead of many small ones. So before passing strings into this tokenizer it might be good to batch short texts into a large one with some known separator between individual texts, and then after split apart again Parameters ---------- moses_repo_path path to the cloned repo