def tokenizer(self): """ Get the tokenizer. Set it if it is null. :return: The tokenizer """ config = cache_ops.get_redis() if self._tokenizer is None: cfg_str = config.get('topic_tiler_config') if cfg_str: cfg = dict(json.loads(cfg_str)) width = int(cfg.get('w', 20)) k_size = int(cfg.get('k', 10)) stop_words = cfg.get('stopwords', 'english') stop_words = stopwords.words(stop_words) cutoff_policy = cfg.get('cutoff_policy', 'HC') else: width = 20 k_size = 10 stop_words = 'english' stop_words = stopwords.words(stop_words) cutoff_policy = 'HC' self._tokenizer = TopicTokenizer(cutoff_policy, stop_words, width, k_size) return self._tokenizer
def tokenizer(self): """ Tokenizer getter :return: The sentence tokenizer """ config = cache_ops.get_redis() if self._tokenizer is None: cfg_str = config.get('sent_tokenizer_config') if cfg_str: cfg = dict(json.loads(cfg_str)) tok_path = cfg.get('sent_tokenizer_path') self._tokenizer = SentenceTokenizer(tok_path) else: tok_path = "nltk:tokenizers/punkt/english.pickle" self._tokenizer = SentenceTokenizer(tok_path) return self._tokenizer
def ner(self): """ Get the tokenizer. Set it if it is null. :return: The tokenizer """ client = cache_ops.get_redis() if self._ner is None: config_str = client.get('ner_config') if config_str: config = dict(json.loads(config_str)) load_gpu = config.get('use_gpu', False) model_type = config.get('ner_model', 'en_core_web_sm') self._ner = NERModel(model_type, load_gpu) else: self._ner = NERModel('en_core_web_sm', False) return self._ner
-h --help Show this help screen """ from docopt import docopt import logging from nlp_server.cache import cache_ops from nlp_server.nlp_celery.celery_app import setup_app, set_config from nlp_server.nlp_celery.tasks.ner_task import NERTask from nlp_server.nlp_celery.tasks.sent_tokenizer_task import SentTokenizerTask from nlp_server.nlp_celery.tasks.topic_tiler_task import TopicTilerTask if __name__ == "__main__": logging.info("Setting up Application") APP = setup_app() DOC = docopt(__doc__, version='NLP Server 0.1') CLIENT = cache_ops.get_redis() logging.info('Setting Config') set_config(DOC, CLIENT) logging.info("Registering Tasks") print(APP.tasks) logging.info("Sending to Celery") APP.send_task( 'NERTask', args=[ 'My name is slim Shady and all you other slim shadys can.', ['PERSON'] ])