def deploy(self): """ :param doc: :param n_test_percent: :return: """ manager = ProjectManager() query = { "serviceid": self.serviceid } config = manager.find_model(query) if config is not None: try: trained_data, df = get_ir_dataset(self.serviceid, self.logger) self.logger.info("Unique labels %s" % np.unique(df.intent.tolist())) group = df.groupby(['intent']).agg('count') stats = group.reset_index().to_json(orient="records") useSelector = False if (len(group) > 1): useSelector = True self.logger.info(stats) document = { "$set": { "ir.status": ProjectManager.STATUS_TRAINING, "ir.status_message": "Intent training is in progress.", "ir.dataset.stats": stats, "ir.last_trained": datetime.datetime.utcnow() } } manager.update_config(query, document) ir = IntentRecognizer(DEFAULT_CONFIG, serviceid=self.serviceid, useSelector=useSelector) self.logger.info("Starting fitting for deployment") ir.fit(df, df.intent) self.logger.info("Fitting for deployment completed") VerbisStore().save_ir(ir) ###############MINIOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO################## # send file to minio server # no engine. .dat - extension VerbisStore().save_ir_minio(ir) document = { "$set": { "utterances": trained_data, } } data_manager = DatasourceManager() data_manager.update_datasource(query, document) document = { "$set": { "ir.status": ProjectManager.STATUS_TRAINED, "ir.status_message": "The Intent model has been successfully trained", "ir.logs.deploy": "" } } manager.update_config(query, document) except (RuntimeError, ValueError, Exception) as e: self.logger.error(e) message = e if (e == "After pruning, no terms remain. Try a lower min_df or a higher max_df." or e == "max_df corresponds to < documents than min_df"): message = "Sufficient vocabulary to build the model is not available. Please add more utterances." elif e == "Invalid type float for labels": message = "Add more intents for intent training" document = { "$set": { "ir.status": ProjectManager.STATUS_TRAINING_FAILED, "ir.status_message": message, "ir.logs.deploy": self.logger.handlers[-1].logs } } manager.update_config(query, document) traceback.print_exc() else: description = 'Unable to find project_config with given id.' \ 'Please check your request params and retry' self.logger.error(description)
def train(self, train_intent): """ :param doc: :param n_test_percent: :return: """ manager = ProjectManager() query = {"serviceid": self.serviceid} config = manager.find_model(query) if config is not None: try: document = { "$set": { "ner.status": ProjectManager.STATUS_TRAINING, "ner.status_message": "Entity training is in progress.", "ner.last_trained": datetime.datetime.utcnow() } } if (train_intent is True): document = { "$set": { "ner.status": ProjectManager.STATUS_TRAINING, "ner.status_message": "Entity training is in progress.", "ir.status": ProjectManager.STATUS_HOLD, "ir.status_message": "Awaiting the completion of entity training.", "ner.last_trained": datetime.datetime.utcnow() } } manager.update_config(query, document) # starting actual training data_manager = DatasourceManager() self.logger.info("Starting training of service %s" % self.serviceid) corpus = data_manager.find_model(query) custom_entity_model = get(config, "custom_entity_model") entity_recognizer = self.instantiate_trainer( custom_entity_model) trained_utterances = entity_recognizer.train(corpus) if entity_recognizer.get_engine( ) not in get_all_corenlp_engines(): VerbisStore().save_ner(entity_recognizer, model_type=MODEL_TYPE_NER) ###############MINIOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO################## # send file to minio server VerbisStore().save_ner_minio(entity_recognizer, model_type=MODEL_TYPE_NER) document = { "$set": { "utterances": trained_utterances, } } data_manager.update_datasource(query, document) document = { "$set": { "ner.status": ProjectManager.STATUS_TRAINED, "ner.status_message": "Entity training completed successfully.", "ner.logs.train": "" } } manager.update_config(query, document) self.logger.info( "Completed training entity recognizer for service %s" % self.serviceid) except (RuntimeError, Exception) as ex: self.logger.exception(ex, exc_info=True) self.logger.error(traceback.format_exc()) if ex == "Cannot have number of folds n_folds=3 greater than the number of samples: 2.": ex = "Add more utterances for entity training" document = { "$set": { "ner.status": ProjectManager.STATUS_TRAINING_FAILED, "ner.status_message": ex, "ner.logs.train": self.logger.handlers[-1].logs } } manager.update_config(query, document) else: description = 'Unable to find project_config with given id.' \ 'Please check your request params and retry' self.logger.error(description)