def uncache_project_if_exists(serviceid): model_store = get_model_store() manager = ProjectManager() query = {"serviceid": serviceid} document = manager.find_model(query) model_lists = model_store.get_active_models() if document is not None: engine = get(document, "engine", "ICE") else: engine = "ICE" model_name = get_model_name(serviceid, MODEL_TYPE_NER, engine) if model_name in model_lists: model_store.unpublish(serviceid, MODEL_TYPE_NER, engine) model_name = get_model_name(serviceid, MODEL_TYPE_IR, engine=None) if model_name in model_lists: model_store.unpublish(serviceid, MODEL_TYPE_IR, engine=None) # delete model file model_store_path = os.path.join(home_dir + os.sep + '.verbis/store') if os.path.exists(os.path.join(model_store_path + os.sep + serviceid)): shutil.rmtree(os.path.join(model_store_path + os.sep + serviceid)) logger.info("uncache_project_if_exists done.")
def cache_model(config,requested_services): for service_each in requested_services: serviceid = service_each[0] model_type = service_each[1] engine="" model_class = None if config[model_type]["status"] == 'trained' or config[model_type]["status"] == 'validated' or config[model_type]["status"] == 'validating': if model_type=="ner": model_class = config['custom_entity_model'] engine = get_engine(model_class) model_name = serviceid + "-" + engine+ "-" +model_type last_trained = config["ner"]["last_trained"] else: model_name = serviceid + "-" +model_type last_trained = config["ir"]["last_trained"] get_model_store().store.check_trained_time_and_reload(model_name, last_trained, serviceid, model_type, engine, model_class)
def utterance_recase(utterances): store = get_model_store() utterances_updated = [] for text in utterances: text, original_text = store.change_case(text) utterances_updated.append(text) return utterances_updated
def tag(doc): store = get_model_store() text = get(doc, "text", default='') text = text.replace('\xa0', ' ') service_id = get(doc, "serviceid", default=None) text, original_text = store.change_case(text) query = {"serviceid": service_id} corpus = datasource_manager.find_datasource_by_service_id(doc["serviceid"]) config = project_manager.find_model(query) datasources_map = { "predefined_entities": get(corpus, "predefined_entities", default=[]), "entities": get(corpus, "entities", default=[]), "patterns": get(corpus, "patterns", default=[]), "phrases": get(corpus, "phrases", default=[]), "distinct_token_list": get(corpus, "distinct_token_list", default=[]), "intents": get(corpus, "intents", default=[]) } projects_map = { "custom_entity_model": get(config, "custom_entity_model", default=None), "ner_status": get(config, "ner.status", default=[]), "language": get(config, 'language', 'EN') } default_class_name = get(config, 'predefined_entity_model', None) custom_class_name = get(config, 'custom_entity_model', None) engine = get_engine(custom_class_name) last_trained = get(config, "ner.last_trained", default=None) default_engine = get_engine(default_class_name) model_name = get_model_name(service_id, "ner", engine) if get_engine(custom_class_name) not in get_all_corenlp_engines(): if get(config, "ner.status", default=None) in ['trained', 'validated']: get_model_store().store.check_trained_time_and_reload( model_name, last_trained, service_id, "ner", engine, custom_class_name) response = store.tag(service_id, text, original_text, engine, default_engine, default_class_name, datasources_map, projects_map) response = remove_resolved_to(response) return response
def on_post(self, req, resp): log_flag = False if app_config['BOTANALYTICS_LOG_FLAG'].upper() == "ON": log_flag, req_id, botanalytics, start_time = True, str( uuid.uuid4()), BotAnalyticsAPI(), datetime.datetime.now() doc = req.context['doc'] or {} try: store = get_model_store() serviceid = doc["serviceid"] datasource = manager.find_datasource_by_service_id(serviceid) phrases = get(datasource, "phrases") patterns = get(datasource, "patterns") entities = get(datasource, "entities", []) predefined_entities = get(datasource, "predefined_entities", []) utterances = get(datasource, "utterances") project_config = project_manager.find_config_by_id(serviceid) default_engine = get_engine( project_config['predefined_entity_model']) type = doc["type"] if type == "phrases": utterances = tag_phrase(phrases, utterances) if type == "patterns": utterances = tag_pattern(patterns, utterances) if type == "predefined": utterances = tag_all_predefined(store, patterns, phrases, entities, utterances, default_engine, predefined_entities, project_config) document = {"$set": {"utterances": utterances}} manager.update_datasource_by_service_id(serviceid, document) resp.data = json.dumps({"msg": "Successfully Updated"}) resp.set_header('X-Powered-By', 'USTGlobal ICE') resp.status = falcon.HTTP_200 except Exception as ex: logger.exception(ex) resp.data = {"msg": ex} resp.set_header('X-Powered-By', 'USTGlobal ICE') resp.status = falcon.HTTP_500 finally: if log_flag: end_time = datetime.datetime.now() total_action_time = relativedelta(end_time, start_time) botanalytics.log(requesttype="nerrequests", serviceid=doc['serviceid'], req_id=req_id, action="BULK TAG", ner_req_timestamp=start_time.replace( microsecond=0).isoformat(), ner_req_end_timestamp=end_time.replace( microsecond=0).isoformat(), total_action_time=( total_action_time.hours * 60 * 60 * 1000 + total_action_time.minutes * 60 * 1000 + total_action_time.seconds * 1000) + (total_action_time.microseconds / 1000))
def on_post(self, req, resp): doc = req.context['doc'] or {} try: home_dir = os.path.expanduser('~') model_store_path = os.path.join(home_dir + os.sep + '.verbis/store') model_store = get_model_store() manager = ProjectManager() serviceid = doc['serviceid'] query = {'serviceid': serviceid} data = manager.find_model(query) engine = get(data, "engine", "ICE") model_name = get_model_name(serviceid, MODEL_TYPE_NER, engine) model_lists = model_store.get_active_models() logger.info("model lists - %s" % model_lists) if model_name in model_lists: model_store.unpublish(serviceid, MODEL_TYPE_NER, engine) model_name = get_model_name(serviceid, MODEL_TYPE_IR, engine=None) logger.info(model_name) if model_name in model_lists: model_store.unpublish(serviceid, MODEL_TYPE_IR, engine=None) logger.info("path is %s" % os.path.join(model_store_path + os.sep + serviceid)) # delete model file from local if os.path.exists( os.path.join(model_store_path + os.sep + serviceid)): # print "path exists, so do rm." shutil.rmtree( os.path.join(model_store_path + os.sep + serviceid)) # delete model file from minio VerbisStore().remove_models_from_remote(serviceid) logger.info("files removed successfully") # update DB document = {"$set": {"ner.status": 'new', "ir.status": 'new'}} manager.update_config(query, document) except AssertionError as ae: logger.exception(ae, exc_info=True) logger.error(traceback.format_exc()) raise falcon.HTTPPreconditionFailed( 'Service publish condition failed', traceback.format_exc()) except Exception as ex: logger.exception(ex, exc_info=True) logger.error(traceback.format_exc()) description = 'Internal Server Error, Please try again later' raise falcon.HTTPServiceUnavailable('Service Outage', description, 30) resp.set_header('X-Powered-By', 'USTGlobal Verbis') resp.status = falcon.HTTP_200
def predict_impl(doc, config, req_id=None): text = doc["text"] store = get_model_store() entity_tags = None response = {"text": text} corpus, datasources_map, projects_map = fetch_data_mappings(doc) truecased_text, retokenized_text, syn_processed_text, syn_processed_truecased_text, syn_processed_retokenized_text, syn_indexes = predict_preprocessing( text, store, datasources_map) for index, (serviceid, model_type, pos) in enumerate(get_requested_services(doc)): logger.info((serviceid, model_type, pos)) engine = get_engine(config['custom_entity_model']) default_engine = get_engine(config['predefined_entity_model']) if model_type == MODEL_TYPE_NER: entities, entity_tags, parts_of_speech = ner_entities( store, config, serviceid, model_type, engine, truecased_text, retokenized_text, syn_processed_truecased_text, syn_processed_retokenized_text, syn_indexes, default_engine, datasources_map, projects_map, pos) response["entities"] = entities if (parts_of_speech is not None) and (get(doc, 'pos', default=True) is True): response["parts_of_speech"] = parts_of_speech elif model_type == MODEL_TYPE_IR: prediction, probabilities = ir_entity_tags( store, serviceid, model_type, engine, syn_processed_truecased_text, entity_tags, datasources_map) logger.info("prediction......................%s" % prediction) logger.info("probabilities......................%s" % probabilities) response["intent"] = { "top_intent": prediction, "confidence_level": probabilities } missed_text = missedUtterences(response, doc['serviceid'], req_id, syn_processed_retokenized_text) if missed_text is not syn_processed_retokenized_text: updateDatasource(serviceid, missed_text) if 'entities' not in response: response['entities'] = [] if 'parts_of_speech' not in response: response['parts_of_speech'] = [] return response
def retag(serviceid, utterances, new_default_model_class, predefined_entities, project_config): store = get_model_store() for utterances_each in utterances: utterance = get(utterances_each, "utterance") case_converted_utterance = get(utterances_each, "case_converted_utterance") mapping = get(utterances_each, "mapping") mapping = json.loads(mapping) doc = { "serviceid": serviceid, "text": utterance } default_engine = get_engine(new_default_model_class) def_tags = store.tag_predefined(MODEL_TYPE_NER, default_engine, case_converted_utterance, utterance) def_tags = [tag for tag in def_tags if str(tag['tag']) in predefined_entities] final_tags = [] cust_tags = get(mapping, "tags", []) final_tags = remove_overlapping(def_tags, cust_tags) mapping["tags"] = final_tags mapping = json.dumps(mapping) utterances_each["mapping"] = mapping utterances_each["ir_trained"] = False utterances_each["ner_trained"] = False return utterances