Example #1
0
def main(profile: str):
    """
    Celery worker main entry point

    Args:
        profile: profile used to run the app

    """
    load_config(profile, CONFIGS_PATH, config, 'NLP_SERVICE')
    initialize_summary_service()
    load()
    publisher = container.get('exchange_publisher')
    if not publisher.test_connection():
        LOGGER.error('Error connecting to the queue provider. Exiting...')
        sys.exit(1)

    add_logstash_handler(LOG_CONFIG, config.logstash.host, config.logstash.port)
    CELERY_APP.configure(task_queue_name='nlp-worker',
                         broker_config=config.rabbit,
                         worker_concurrency=config.celery.concurrency,
                         result_backend_url=build_redis_url(**config.redis))

    apm_client = Client(config={
        'SERVICE_NAME': config.elastic_apm.service_name,
        'SECRET_TOKEN': config.elastic_apm.secret_token,
        'SERVER_URL': config.elastic_apm.url
    })
    register_instrumentation(apm_client)
    register_exception_tracking(apm_client)

    CELERY_APP.run()
Example #2
0
def shutdown_worker(*_, **__):
    """
    Shutdown the celery worker shutting down the exchange publisher
    """
    LOGGER.info('Shutting down worker')
    exchange_publisher: ExchangePublisher = container.get('exchange_publisher')
    exchange_publisher.shutdown()
Example #3
0
def main(profile: str):
    """
    Celery app main entry point

    Args:
        profile: profile used to run the app

    """
    load_config(profile, CONFIGS_PATH, config, 'NEWS_DISCOVERY')
    load()
    publisher = container.get('exchange_publisher')
    if not publisher.test_connection():
        LOGGER.error('Error connecting to the queue provider. Exiting...')
        sys.exit(1)

    add_logstash_handler(LOG_CONFIG, config.logstash.host,
                         config.logstash.port)
    CELERY_APP.configure(task_queue_name='news-discovery',
                         broker_config=config.rabbit,
                         worker_concurrency=config.celery.concurrency)

    apm_client = Client(
        config={
            'SERVICE_NAME': 'news-discovery-app',
            'SECRET_TOKEN': config.elastic_apm.secret_token,
            'SERVER_URL': config.elastic_apm.url
        })
    register_instrumentation(apm_client)
    register_exception_tracking(apm_client)

    CELERY_APP.run()
Example #4
0
def hydrate_new(new: dict = None, nlp_doc: dict = None, summary: str = None, sentiment: float = None, **_):
    """
    Hydrate the input new with the named entities and noun chunks from the input NLP document and with the input
    summary and sentiment

    Args:
        new: new to hydrate
        nlp_doc: new NLP information
        summary: new summary
        sentiment: new sentiment

    Returns: hydrated new

    """
    LOGGER.info('Hydrating new %s', new['title'])
    new = from_dict(New, new)

    if summary is not None:
        new.summary = summary

    if sentiment is not None:
        new.sentiment = sentiment

    nlp_service = container.get('nlp_service')
    if nlp_doc is not None:
        doc = nlp_service.doc_from_json_dict(nlp_doc)
        new.entities = list(
            set(map(lambda entity: NamedEntity(text=str(entity), type=entity.label_), doc.ents)))
        new.noun_chunks = list(map(lambda chunk: str(chunk), doc.noun_chunks))

    new.hydrated = True

    return asdict(new)
Example #5
0
def initialize_worker(*_, **__):
    """
    Initialize the celery worker process environment
    """
    LOGGER.info('Initializing worker')
    exchange_publisher: ExchangePublisher = container.get('exchange_publisher')
    exchange_publisher.connect()
    exchange_publisher.initialize()
Example #6
0
def sentiment_analysis(nlp_doc: dict = None, **_):
    """
    Get the sentiment score of the input doc sentences

    Args:
        nlp_doc: doc to analyze sentiment

    Returns: input doc sentences sentiment score

    """
    LOGGER.info('Generating sentiment score')

    nlp_service = container.get('nlp_service')
    sentiment_analyzer = container.get('sentiment_analysis_service')
    if sentiment_analyzer is not None:
        if nlp_doc is not None:
            doc = nlp_service.doc_from_json_dict(nlp_doc)
            return sentiment_analyzer(list(doc.sents))
        else:
            LOGGER.warning('NLP document is missing. Skipping sentiment calculation...')
            return None
    else:
        LOGGER.warning('Sentiment analyzer not initialized. Skipping sentiment calculation...')
        return None
Example #7
0
def summarize(nlp_doc: dict = None, **_):
    """
    Generate the summary for the input NLP doc

    Args:
        nlp_doc: document to generate summary

    Returns: summary of the doc sentences

    """
    LOGGER.info('Generating summary')

    nlp_service = container.get('nlp_service')
    summarizer = container.get('summary_service')
    if summarizer is not None:
        if nlp_doc is not None:
            doc = nlp_service.doc_from_json_dict(nlp_doc)
            return summarizer(list(doc.sents))
        else:
            LOGGER.warning('NLP document is missing. Skipping summary generation...')
            return None
    else:
        LOGGER.warning('Summarizer not initialized. Skipping summary generation...')
        return None
Example #8
0
def discover_news(definition_name: str):
    """
    Discover news task
    Args:
        definition_name: name of the news discovery definition
    """
    if 'rabbit' in config:
        LOGGER.info(f'Executing discovery {definition_name}')
        definition = DEFINITIONS[definition_name]
        definition_instance = definition['class'](definition)

        exchange_publisher: ExchangePublisher = container.get(
            'exchange_publisher')

        for discovered_new in definition_instance():
            exchange_publisher(asdict(discovered_new))

    else:
        LOGGER.error('Worker configuration not initialized')
Example #9
0
def process_new_content(new: dict = None, **_):
    """
    Apply NLP processing to the input new content

    Args:
        new: new to process content

    Returns: new to hydrate in next tasks, processed new content

    """
    LOGGER.info('NLP Processing new %s', new['title'])

    nlp_service = container.get('nlp_service')
    if nlp_service is not None:
        processed_content = nlp_service.process_text(new['content'])
        return nlp_service.doc_to_json_dict(processed_content)
    else:
        LOGGER.warning('NLP service not initialized, skipping NLP processing')
        return None
Example #10
0
def publish_hydrated_new(new: dict = None, **_):
    """
    Publish the the input new updated
    Args:
        new: new to publish
    """
    if new is not None:
        LOGGER.info('Publishing hydrated new %s', new['title'])
        if config.rabbit is not None:
            LOGGER.info('Queue connection initialized, publishing...')

            exchange_publisher: ExchangePublisher = container.get('exchange_publisher')
            exchange_publisher(new)

            LOGGER.info('New published')
        else:
            LOGGER.warning('Queue connection configuration not initialized, skipping publish...')
    else:
        LOGGER.warning('Tasks chain services not initialized, skipping publish...')