def public_trained_projects(organisation_name): """ :param organisation_name: :return: """ manager = ProjectManager() query = { "$and": [{ "organisation_name": organisation_name }, { "visibility": "public" }, { "ner.status": "trained" }, { "ir.status": "trained" }, { "masterBot": False }] } # query = {"$and": [{"visibility": "public"}, {"ner.status": "trained"}, {"ir.status": "trained"}, # {"masterBot": False}]} projection = {"name": 1, "serviceid": 1} data = manager.exists(query) if data: projects_to_import = manager.find(query, projection) logger.info("public trained projects found in PR DB") return projects_to_import else: return False
def user_trained_projects(userid): """ :param userid: :return: """ manager = ProjectManager() query = { "$and": [{ "createdBy": ObjectId(userid) }, { "ner.status": "trained" }, { "ir.status": "trained" }, { "visibility": "private" }, { "masterBot": False }] } # query = {"$and": [{"createdBy": ObjectId(userid)}, {"ner.status": "trained"}, {"ir.status": "trained"}, # {"visibility": "private"}]} projection = {"name": 1, "serviceid": 1} data = manager.exists(query) if data: projects_to_import = manager.find(query, projection) logger.info("user trained projects found in PR DB.") return projects_to_import else: return False
def uncache_project_if_exists(serviceid): model_store = get_model_store() manager = ProjectManager() query = {"serviceid": serviceid} document = manager.find_model(query) model_lists = model_store.get_active_models() if document is not None: engine = get(document, "engine", "ICE") else: engine = "ICE" model_name = get_model_name(serviceid, MODEL_TYPE_NER, engine) if model_name in model_lists: model_store.unpublish(serviceid, MODEL_TYPE_NER, engine) model_name = get_model_name(serviceid, MODEL_TYPE_IR, engine=None) if model_name in model_lists: model_store.unpublish(serviceid, MODEL_TYPE_IR, engine=None) # delete model file model_store_path = os.path.join(home_dir + os.sep + '.verbis/store') if os.path.exists(os.path.join(model_store_path + os.sep + serviceid)): shutil.rmtree(os.path.join(model_store_path + os.sep + serviceid)) logger.info("uncache_project_if_exists done.")
def change_publish_status(): ''' uncache model if their last date of access more than 60 days. ''' try: manager = ProjectManager() query = {} data = manager.find_all_model(query) for document in data: try: ner_status= get(document,'ner.status',"new") ir_status= get(document,'ir.status',"new") print(ner_status, ir_status) serviceid= document['serviceid'] if ner_status=="published" and ir_status=="published": ner_status='trained' ir_status = 'trained' print('condition1') elif ner_status=="published" and ir_status!="published": ner_status = 'trained' print('condition2') elif ir_status=="published" and ner_status!="published": ir_status = 'trained' print('condition3') manager.update_ir_ner_status(serviceid, ner_status, ir_status) except Exception as e: print("exception : ",e) except Exception as e: print("exception : ",e)
def update_last_access_to_predict_api(serviceid): query = {"serviceid": serviceid} document = { "$set": { "lastAccessed": datetime.datetime.utcnow(), } } manager = ProjectManager() manager.update_config(query, document)
def on_post(self, req, resp): doc = req.context['doc'] or {} try: home_dir = os.path.expanduser('~') model_store_path = os.path.join(home_dir + os.sep + '.verbis/store') model_store = get_model_store() manager = ProjectManager() serviceid = doc['serviceid'] query = {'serviceid': serviceid} data = manager.find_model(query) engine = get(data, "engine", "ICE") model_name = get_model_name(serviceid, MODEL_TYPE_NER, engine) model_lists = model_store.get_active_models() logger.info("model lists - %s" % model_lists) if model_name in model_lists: model_store.unpublish(serviceid, MODEL_TYPE_NER, engine) model_name = get_model_name(serviceid, MODEL_TYPE_IR, engine=None) logger.info(model_name) if model_name in model_lists: model_store.unpublish(serviceid, MODEL_TYPE_IR, engine=None) logger.info("path is %s" % os.path.join(model_store_path + os.sep + serviceid)) # delete model file from local if os.path.exists( os.path.join(model_store_path + os.sep + serviceid)): # print "path exists, so do rm." shutil.rmtree( os.path.join(model_store_path + os.sep + serviceid)) # delete model file from minio VerbisStore().remove_models_from_remote(serviceid) logger.info("files removed successfully") # update DB document = {"$set": {"ner.status": 'new', "ir.status": 'new'}} manager.update_config(query, document) except AssertionError as ae: logger.exception(ae, exc_info=True) logger.error(traceback.format_exc()) raise falcon.HTTPPreconditionFailed( 'Service publish condition failed', traceback.format_exc()) except Exception as ex: logger.exception(ex, exc_info=True) logger.error(traceback.format_exc()) description = 'Internal Server Error, Please try again later' raise falcon.HTTPServiceUnavailable('Service Outage', description, 30) resp.set_header('X-Powered-By', 'USTGlobal Verbis') resp.status = falcon.HTTP_200
def validate_project_organisation(project_id, organisation_name): manager = ProjectManager() query = { "$and": [{ "_id": ObjectId(project_id) }, { "organisation_name": organisation_name }] } if manager.exists(query): return True else: return False
def validate_service_id_and_cache(doc): """ # cache model for respective project if not cached during prediction call. :param doc: :return: """ manager = ProjectManager() query = {"serviceid": get(doc, "serviceid")} config = manager.find_model(query) if config is None: raise Exception("Invalid Service ID.") else: serviceid_info = get_requested_services(doc) cache_model(config, serviceid_info) return config
def test_validate_service_id_and_cache_data(mocker): doc = dict( text= "I would like to have an appointment of Dr Merin on 15 / 10 / 2018 for Eliza", serviceid="MedicalAssistant-test", pos=True, intent=True, entity=True) conf = dict( predefined_entity_model= "ice_commons.er.engines.spacy_ner.SpacyDefaultNER", custom_entity_model="ice_commons.er.engines.mitie_ner.MitieCustomNER", serviceid="MedicalAssistant-test", language="EN", ner=dict(status="new"), ir=dict(status="new")) manager = mocker.patch( 'ice_rest.rest.services.parse.predict.ProjectManager', return_value=ProjectManager()) mocker.patch.object(manager.return_value, 'find_model', return_value=conf) mocker.patch('ice_rest.rest.services.parse.predict.get_requested_services', return_value=[('MedicalAssistant-test', 'ner', True), ('MedicalAssistant-test', 'ir', None)]) mocker.patch('ice_rest.rest.services.parse.predict.cache_model', return_value=None) config = validate_service_id_and_cache(doc) assert config == conf
def delete_project_if_exists(given_serviceid): ''' delete_project_if_exists ''' manager = ProjectManager() query = {"serviceid": given_serviceid} data = manager.exists(query) if data: manager.delete(query) logger.info("project deleted from PR DB.") ds_manager = DatasourceManager() data = ds_manager.exists(query) if data: ds_manager.delete(query) logger.info("project deleted from DS DB.")
def get_published_models(): manager = ProjectManager() ids = manager.get_models_by_status(manager.STATUS_PUBLISHED) df = pd.DataFrame(ids) results = [] for index, row in df.iterrows(): service_id = row['serviceid'] ner_status = get(row, "ner.status") engine = get(row, "engine") if ner_status == ProjectManager.STATUS_PUBLISHED: results.append((service_id, 'ner', engine)) ir_status = get(row, "ir.status") if ir_status == ProjectManager.STATUS_PUBLISHED: results.append((service_id, 'ir', None)) return results
def test_update_last_access_to_predict_api(mocker): manager = mocker.patch( 'ice_rest.rest.services.parse.predict.ProjectManager', return_value=ProjectManager()) mocker.patch.object(manager.return_value, 'update_config', return_value=None) assert update_last_access_to_predict_api("") == None
def test_validate_service_id_and_cache_null(mocker): manager = mocker.patch( 'ice_rest.rest.services.parse.predict.ProjectManager', return_value=ProjectManager()) mocker.patch.object(manager.return_value, 'find_model', return_value=None) with pytest.raises(Exception) as exception: assert validate_service_id_and_cache({}) assert str(exception.value) == "Invalid Service ID."
def updateStatus(serviceid, train_ner, train_ir): manager = ProjectManager() query = {"serviceid": serviceid} config = manager.find_model(query) if config is not None: if train_ner is True: document = { "$set": { "ner.status": ProjectManager.STATUS_HOLD, "ner.status_message": "Awaiting the completion of entity training." } } manager.update_config(query, document) if train_ir is True: document = { "$set": { "ir.status": ProjectManager.STATUS_HOLD, "ir.status_message": "Awaiting the completion of intent training." } } manager.update_config(query, document)
def db_add_dict(serviceid, text): manager = DatasourceManager() manager2 = ProjectManager() project_config = manager2.find_config_by_id(serviceid) language_code = get(project_config, "language", "EN") corpus = manager.find_datasource_by_service_id(serviceid) distinct_token_list = get(corpus, "distinct_token_list") if distinct_token_list is None: distinct_token_list = [] stopword_removed_text = stopword_remover(text, language_code) distinct_token_list.extend(list(set(remove_single_character_tokens(stopword_removed_text)))) distinct_token_list = list(set(distinct_token_list)) document = { "$set": { "distinct_token_list": distinct_token_list, } } manager.update_datasource_by_service_id(serviceid, document)
def uncache_project_if_exists(): ''' uncache model if their last date of access more than X days. ''' try: manager = ProjectManager() query = {} data = manager.find_all_model(query) for document in data: try: serviceid = document['serviceid'] last_accessed = document['lastAccessed'] days_diff = datetime.today() - timedelta( days=int(app_config['MINIO_DAYS'])) ner_status = document['ner']['status'] ir_status = document['ir']['status'] if last_accessed < days_diff and ner_status in [ 'trained', 'validated' ] and ir_status in ['trained', 'validated']: logger.info("removal process starts") requests.post( url="http://localhost:8021/api/parse/cache/remove", json={'serviceid': serviceid}, headers={'Content-type': 'application/json'}) logger.info( "Model with service id: %s is removed from minio cache since it was last accessed 90 " "days back. Please retrain." % serviceid) except Exception as ex: logger.exception(ex, exc_info=True) logger.error(traceback.format_exc()) description = 'Internal Server Error, Please try again later' raise falcon.HTTPServiceUnavailable('Service Outage', description, 30) except Exception as ex: logger.exception(ex, exc_info=True) logger.error(traceback.format_exc()) description = 'Internal Server Error, Please try again later' raise falcon.HTTPServiceUnavailable('Service Outage', description, 30)
def put_corenlp_modelname(serviceid, model_name): manager = ProjectManager() query = {"serviceid": serviceid} document = {"$set": {"corenlp_model_name": model_name.split("/")[-1]}} manager.update_config(query, document)
def get_corenlp_modelname(serviceid): manager = ProjectManager() query = {"serviceid": serviceid} ds = manager.find_model(query) return get(ds, "corenlp_model_name", default=None)
def get_ner_status(serviceid): manager = ProjectManager() query = {"serviceid": serviceid} ds = manager.find_model(query) return get(ds, "ner.status", default=[])
def get_custom_class_name(serviceid): manager = ProjectManager() query = {"serviceid": serviceid} ds = manager.find_model(query) return get(ds, "custom_entity_model", default=[])
import logging from pydash import get from ice_commons.data.dl.manager import DatasourceManager, ProjectManager from ice_rest.rest.services.parse.impl.common.missed_utterances_impl import missedUtterences from ice_rest.rest.services.parse.impl.common.store_utils import get_model_store, get_requested_services from ice_commons.utility.custom_tokenizer import tokenize_utterance from ice_commons.core.model_utils import get_engine from ice_commons.utils import MODEL_TYPE_IR, MODEL_TYPE_NER import re from collections import OrderedDict logger = logging.getLogger(__name__) project_manager = ProjectManager() datasource_manager = DatasourceManager() def get_proba(intent_list): prob = {} for intent_each in intent_list: if intent_each['name'] != "No intent": prob[intent_each['name']] = "0.0%" prob["No intent"] = "100.0%" return prob def updateDatasource(serviceid, missed_text): """ :param serviceid: :param missed_text: :return:
def train(self, train_intent): """ :param doc: :param n_test_percent: :return: """ manager = ProjectManager() query = {"serviceid": self.serviceid} config = manager.find_model(query) if config is not None: try: document = { "$set": { "ner.status": ProjectManager.STATUS_TRAINING, "ner.status_message": "Entity training is in progress.", "ner.last_trained": datetime.datetime.utcnow() } } if (train_intent is True): document = { "$set": { "ner.status": ProjectManager.STATUS_TRAINING, "ner.status_message": "Entity training is in progress.", "ir.status": ProjectManager.STATUS_HOLD, "ir.status_message": "Awaiting the completion of entity training.", "ner.last_trained": datetime.datetime.utcnow() } } manager.update_config(query, document) # starting actual training data_manager = DatasourceManager() self.logger.info("Starting training of service %s" % self.serviceid) corpus = data_manager.find_model(query) custom_entity_model = get(config, "custom_entity_model") entity_recognizer = self.instantiate_trainer( custom_entity_model) trained_utterances = entity_recognizer.train(corpus) if entity_recognizer.get_engine( ) not in get_all_corenlp_engines(): VerbisStore().save_ner(entity_recognizer, model_type=MODEL_TYPE_NER) ###############MINIOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO################## # send file to minio server VerbisStore().save_ner_minio(entity_recognizer, model_type=MODEL_TYPE_NER) document = { "$set": { "utterances": trained_utterances, } } data_manager.update_datasource(query, document) document = { "$set": { "ner.status": ProjectManager.STATUS_TRAINED, "ner.status_message": "Entity training completed successfully.", "ner.logs.train": "" } } manager.update_config(query, document) self.logger.info( "Completed training entity recognizer for service %s" % self.serviceid) except (RuntimeError, Exception) as ex: self.logger.exception(ex, exc_info=True) self.logger.error(traceback.format_exc()) if ex == "Cannot have number of folds n_folds=3 greater than the number of samples: 2.": ex = "Add more utterances for entity training" document = { "$set": { "ner.status": ProjectManager.STATUS_TRAINING_FAILED, "ner.status_message": ex, "ner.logs.train": self.logger.handlers[-1].logs } } manager.update_config(query, document) else: description = 'Unable to find project_config with given id.' \ 'Please check your request params and retry' self.logger.error(description)
def deploy(self): """ :param doc: :param n_test_percent: :return: """ manager = ProjectManager() query = { "serviceid": self.serviceid } config = manager.find_model(query) if config is not None: try: trained_data, df = get_ir_dataset(self.serviceid, self.logger) self.logger.info("Unique labels %s" % np.unique(df.intent.tolist())) group = df.groupby(['intent']).agg('count') stats = group.reset_index().to_json(orient="records") useSelector = False if (len(group) > 1): useSelector = True self.logger.info(stats) document = { "$set": { "ir.status": ProjectManager.STATUS_TRAINING, "ir.status_message": "Intent training is in progress.", "ir.dataset.stats": stats, "ir.last_trained": datetime.datetime.utcnow() } } manager.update_config(query, document) ir = IntentRecognizer(DEFAULT_CONFIG, serviceid=self.serviceid, useSelector=useSelector) self.logger.info("Starting fitting for deployment") ir.fit(df, df.intent) self.logger.info("Fitting for deployment completed") VerbisStore().save_ir(ir) ###############MINIOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO################## # send file to minio server # no engine. .dat - extension VerbisStore().save_ir_minio(ir) document = { "$set": { "utterances": trained_data, } } data_manager = DatasourceManager() data_manager.update_datasource(query, document) document = { "$set": { "ir.status": ProjectManager.STATUS_TRAINED, "ir.status_message": "The Intent model has been successfully trained", "ir.logs.deploy": "" } } manager.update_config(query, document) except (RuntimeError, ValueError, Exception) as e: self.logger.error(e) message = e if (e == "After pruning, no terms remain. Try a lower min_df or a higher max_df." or e == "max_df corresponds to < documents than min_df"): message = "Sufficient vocabulary to build the model is not available. Please add more utterances." elif e == "Invalid type float for labels": message = "Add more intents for intent training" document = { "$set": { "ir.status": ProjectManager.STATUS_TRAINING_FAILED, "ir.status_message": message, "ir.logs.deploy": self.logger.handlers[-1].logs } } manager.update_config(query, document) traceback.print_exc() else: description = 'Unable to find project_config with given id.' \ 'Please check your request params and retry' self.logger.error(description)