Esempio n. 1
0
    def deploy(self):
        """
        :param doc:
        :param n_test_percent:
        :return:
        """
        manager = ProjectManager()
        query = {
            "serviceid": self.serviceid
        }
        config = manager.find_model(query)
        if config is not None:
            try:
                trained_data, df = get_ir_dataset(self.serviceid, self.logger)
                self.logger.info("Unique labels %s" % np.unique(df.intent.tolist()))

                group = df.groupby(['intent']).agg('count')
                stats = group.reset_index().to_json(orient="records")
                useSelector = False
                if (len(group) > 1):
                    useSelector = True
                self.logger.info(stats)

                document = {
                    "$set": {
                        "ir.status": ProjectManager.STATUS_TRAINING,
                        "ir.status_message": "Intent training is in progress.",
                        "ir.dataset.stats": stats,
                        "ir.last_trained": datetime.datetime.utcnow()
                    }
                }
                manager.update_config(query, document)

                ir = IntentRecognizer(DEFAULT_CONFIG, serviceid=self.serviceid, useSelector=useSelector)
                self.logger.info("Starting fitting for deployment")
                ir.fit(df, df.intent)
                self.logger.info("Fitting for deployment completed")

                VerbisStore().save_ir(ir)
                ###############MINIOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO##################
                # send file to minio server
                # no engine. .dat - extension
                VerbisStore().save_ir_minio(ir)
                document = {
                    "$set": {
                        "utterances": trained_data,
                    }
                }
                data_manager = DatasourceManager()
                data_manager.update_datasource(query, document)

                document = {
                    "$set": {
                        "ir.status": ProjectManager.STATUS_TRAINED,
                        "ir.status_message": "The Intent model has been successfully trained",
                        "ir.logs.deploy": ""
                    }
                }
                manager.update_config(query, document)
            except (RuntimeError, ValueError, Exception) as e:
                self.logger.error(e)
                message = e
                if (e == "After pruning, no terms remain. Try a lower min_df or a higher max_df."
                        or e == "max_df corresponds to < documents than min_df"):
                    message = "Sufficient vocabulary to build the model is not available. Please add more utterances."
                elif e == "Invalid type float for labels":
                    message = "Add more intents for intent training"
                document = {
                    "$set": {
                        "ir.status": ProjectManager.STATUS_TRAINING_FAILED,
                        "ir.status_message": message,
                        "ir.logs.deploy": self.logger.handlers[-1].logs
                    }
                }
                manager.update_config(query, document)
                traceback.print_exc()
        else:
            description = 'Unable to find project_config with given id.' \
                          'Please check your request params and retry'
            self.logger.error(description)
Esempio n. 2
0
    def train(self, train_intent):
        """
        :param doc:
        :param n_test_percent:
        :return:
        """
        manager = ProjectManager()
        query = {"serviceid": self.serviceid}
        config = manager.find_model(query)
        if config is not None:
            try:
                document = {
                    "$set": {
                        "ner.status": ProjectManager.STATUS_TRAINING,
                        "ner.status_message":
                        "Entity training is in progress.",
                        "ner.last_trained": datetime.datetime.utcnow()
                    }
                }
                if (train_intent is True):
                    document = {
                        "$set": {
                            "ner.status": ProjectManager.STATUS_TRAINING,
                            "ner.status_message":
                            "Entity training is in progress.",
                            "ir.status": ProjectManager.STATUS_HOLD,
                            "ir.status_message":
                            "Awaiting the completion of entity training.",
                            "ner.last_trained": datetime.datetime.utcnow()
                        }
                    }
                manager.update_config(query, document)

                # starting actual training
                data_manager = DatasourceManager()
                self.logger.info("Starting training of service %s" %
                                 self.serviceid)
                corpus = data_manager.find_model(query)
                custom_entity_model = get(config, "custom_entity_model")
                entity_recognizer = self.instantiate_trainer(
                    custom_entity_model)
                trained_utterances = entity_recognizer.train(corpus)
                if entity_recognizer.get_engine(
                ) not in get_all_corenlp_engines():
                    VerbisStore().save_ner(entity_recognizer,
                                           model_type=MODEL_TYPE_NER)
                ###############MINIOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO##################
                # send file to minio server
                VerbisStore().save_ner_minio(entity_recognizer,
                                             model_type=MODEL_TYPE_NER)
                document = {
                    "$set": {
                        "utterances": trained_utterances,
                    }
                }
                data_manager.update_datasource(query, document)

                document = {
                    "$set": {
                        "ner.status": ProjectManager.STATUS_TRAINED,
                        "ner.status_message":
                        "Entity training completed successfully.",
                        "ner.logs.train": ""
                    }
                }
                manager.update_config(query, document)

                self.logger.info(
                    "Completed training entity recognizer for service %s" %
                    self.serviceid)
            except (RuntimeError, Exception) as ex:
                self.logger.exception(ex, exc_info=True)
                self.logger.error(traceback.format_exc())
                if ex == "Cannot have number of folds n_folds=3 greater than the number of samples: 2.":
                    ex = "Add more utterances for entity training"
                document = {
                    "$set": {
                        "ner.status": ProjectManager.STATUS_TRAINING_FAILED,
                        "ner.status_message": ex,
                        "ner.logs.train": self.logger.handlers[-1].logs
                    }
                }
                manager.update_config(query, document)
        else:
            description = 'Unable to find project_config with given id.' \
                          'Please check your request params and retry'
            self.logger.error(description)