def main(profile: str): """ Celery worker main entry point Args: profile: profile used to run the app """ load_config(profile, CONFIGS_PATH, config, 'NLP_SERVICE') initialize_summary_service() load() publisher = container.get('exchange_publisher') if not publisher.test_connection(): LOGGER.error('Error connecting to the queue provider. Exiting...') sys.exit(1) add_logstash_handler(LOG_CONFIG, config.logstash.host, config.logstash.port) CELERY_APP.configure(task_queue_name='nlp-worker', broker_config=config.rabbit, worker_concurrency=config.celery.concurrency, result_backend_url=build_redis_url(**config.redis)) apm_client = Client(config={ 'SERVICE_NAME': config.elastic_apm.service_name, 'SECRET_TOKEN': config.elastic_apm.secret_token, 'SERVER_URL': config.elastic_apm.url }) register_instrumentation(apm_client) register_exception_tracking(apm_client) CELERY_APP.run()
def shutdown_worker(*_, **__): """ Shutdown the celery worker shutting down the exchange publisher """ LOGGER.info('Shutting down worker') exchange_publisher: ExchangePublisher = container.get('exchange_publisher') exchange_publisher.shutdown()
def main(profile: str): """ Celery app main entry point Args: profile: profile used to run the app """ load_config(profile, CONFIGS_PATH, config, 'NEWS_DISCOVERY') load() publisher = container.get('exchange_publisher') if not publisher.test_connection(): LOGGER.error('Error connecting to the queue provider. Exiting...') sys.exit(1) add_logstash_handler(LOG_CONFIG, config.logstash.host, config.logstash.port) CELERY_APP.configure(task_queue_name='news-discovery', broker_config=config.rabbit, worker_concurrency=config.celery.concurrency) apm_client = Client( config={ 'SERVICE_NAME': 'news-discovery-app', 'SECRET_TOKEN': config.elastic_apm.secret_token, 'SERVER_URL': config.elastic_apm.url }) register_instrumentation(apm_client) register_exception_tracking(apm_client) CELERY_APP.run()
def hydrate_new(new: dict = None, nlp_doc: dict = None, summary: str = None, sentiment: float = None, **_): """ Hydrate the input new with the named entities and noun chunks from the input NLP document and with the input summary and sentiment Args: new: new to hydrate nlp_doc: new NLP information summary: new summary sentiment: new sentiment Returns: hydrated new """ LOGGER.info('Hydrating new %s', new['title']) new = from_dict(New, new) if summary is not None: new.summary = summary if sentiment is not None: new.sentiment = sentiment nlp_service = container.get('nlp_service') if nlp_doc is not None: doc = nlp_service.doc_from_json_dict(nlp_doc) new.entities = list( set(map(lambda entity: NamedEntity(text=str(entity), type=entity.label_), doc.ents))) new.noun_chunks = list(map(lambda chunk: str(chunk), doc.noun_chunks)) new.hydrated = True return asdict(new)
def initialize_worker(*_, **__): """ Initialize the celery worker process environment """ LOGGER.info('Initializing worker') exchange_publisher: ExchangePublisher = container.get('exchange_publisher') exchange_publisher.connect() exchange_publisher.initialize()
def sentiment_analysis(nlp_doc: dict = None, **_): """ Get the sentiment score of the input doc sentences Args: nlp_doc: doc to analyze sentiment Returns: input doc sentences sentiment score """ LOGGER.info('Generating sentiment score') nlp_service = container.get('nlp_service') sentiment_analyzer = container.get('sentiment_analysis_service') if sentiment_analyzer is not None: if nlp_doc is not None: doc = nlp_service.doc_from_json_dict(nlp_doc) return sentiment_analyzer(list(doc.sents)) else: LOGGER.warning('NLP document is missing. Skipping sentiment calculation...') return None else: LOGGER.warning('Sentiment analyzer not initialized. Skipping sentiment calculation...') return None
def summarize(nlp_doc: dict = None, **_): """ Generate the summary for the input NLP doc Args: nlp_doc: document to generate summary Returns: summary of the doc sentences """ LOGGER.info('Generating summary') nlp_service = container.get('nlp_service') summarizer = container.get('summary_service') if summarizer is not None: if nlp_doc is not None: doc = nlp_service.doc_from_json_dict(nlp_doc) return summarizer(list(doc.sents)) else: LOGGER.warning('NLP document is missing. Skipping summary generation...') return None else: LOGGER.warning('Summarizer not initialized. Skipping summary generation...') return None
def discover_news(definition_name: str): """ Discover news task Args: definition_name: name of the news discovery definition """ if 'rabbit' in config: LOGGER.info(f'Executing discovery {definition_name}') definition = DEFINITIONS[definition_name] definition_instance = definition['class'](definition) exchange_publisher: ExchangePublisher = container.get( 'exchange_publisher') for discovered_new in definition_instance(): exchange_publisher(asdict(discovered_new)) else: LOGGER.error('Worker configuration not initialized')
def process_new_content(new: dict = None, **_): """ Apply NLP processing to the input new content Args: new: new to process content Returns: new to hydrate in next tasks, processed new content """ LOGGER.info('NLP Processing new %s', new['title']) nlp_service = container.get('nlp_service') if nlp_service is not None: processed_content = nlp_service.process_text(new['content']) return nlp_service.doc_to_json_dict(processed_content) else: LOGGER.warning('NLP service not initialized, skipping NLP processing') return None
def publish_hydrated_new(new: dict = None, **_): """ Publish the the input new updated Args: new: new to publish """ if new is not None: LOGGER.info('Publishing hydrated new %s', new['title']) if config.rabbit is not None: LOGGER.info('Queue connection initialized, publishing...') exchange_publisher: ExchangePublisher = container.get('exchange_publisher') exchange_publisher(new) LOGGER.info('New published') else: LOGGER.warning('Queue connection configuration not initialized, skipping publish...') else: LOGGER.warning('Tasks chain services not initialized, skipping publish...')