コード例 #1
0
ファイル: train.py プロジェクト: karansingla06/ICE-NER-NLP
def get_ir_dataset(serviceid, logger):
    logger = logger or logging.getLogger(__name__)
    data_manager = DatasourceManager()
    logger.info("Starting evaluation of service %s" % serviceid)

    def get_training_data(row):
        mapping = loads(get(row, "mapping"))
        intent = None
        if mapping is not None:
            intent = get(mapping, "intent")
        if intent is None:
            intent = "No intent"
        row["ir_trained"] = True
        text = get(row, "case_converted_utterance")
        l = []
        if get(row, "ner_trained") == True:
            tokens = text.split()
            tags = get(mapping, 'tags')
            prev_end = 0
            for tag_num, tag in enumerate(tags):
                start = get(tag, 'start')
                end = get(tag, 'end')
                label = get(tag, 'tag')
                for index, token_each in enumerate(tokens):
                    if ((index < start) and index >= prev_end):
                        l.append(token_each)
                    elif (index == start):
                        l.append(label.upper())
                prev_end = end
            if (prev_end < len(tokens)):
                l.extend(tokens[prev_end:len(tokens)])
            text = ' '.join(l)
        db_add_dict(serviceid, text)
        return row, text, intent

    query = {
        "serviceid": serviceid
    }
    corpus = data_manager.find_model(query)
    utterances = get(corpus, "utterances")
    results = list(map(get_training_data, utterances))
    trained_utterances = [items[0] for items in results]
    training_data = [(items[1], items[2]) for items in results]
    return trained_utterances, pd.DataFrame(training_data, columns=["text", "intent"])
コード例 #2
0
def get_predefined_entities(serviceid):
    manager = DatasourceManager()
    query = {"serviceid": serviceid}
    ds = manager.find_model(query)
    return get(ds, "predefined_entities", default=[])
コード例 #3
0
ファイル: train.py プロジェクト: karansingla06/ICE-NER-NLP
    def train(self, train_intent):
        """
        :param doc:
        :param n_test_percent:
        :return:
        """
        manager = ProjectManager()
        query = {"serviceid": self.serviceid}
        config = manager.find_model(query)
        if config is not None:
            try:
                document = {
                    "$set": {
                        "ner.status": ProjectManager.STATUS_TRAINING,
                        "ner.status_message":
                        "Entity training is in progress.",
                        "ner.last_trained": datetime.datetime.utcnow()
                    }
                }
                if (train_intent is True):
                    document = {
                        "$set": {
                            "ner.status": ProjectManager.STATUS_TRAINING,
                            "ner.status_message":
                            "Entity training is in progress.",
                            "ir.status": ProjectManager.STATUS_HOLD,
                            "ir.status_message":
                            "Awaiting the completion of entity training.",
                            "ner.last_trained": datetime.datetime.utcnow()
                        }
                    }
                manager.update_config(query, document)

                # starting actual training
                data_manager = DatasourceManager()
                self.logger.info("Starting training of service %s" %
                                 self.serviceid)
                corpus = data_manager.find_model(query)
                custom_entity_model = get(config, "custom_entity_model")
                entity_recognizer = self.instantiate_trainer(
                    custom_entity_model)
                trained_utterances = entity_recognizer.train(corpus)
                if entity_recognizer.get_engine(
                ) not in get_all_corenlp_engines():
                    VerbisStore().save_ner(entity_recognizer,
                                           model_type=MODEL_TYPE_NER)
                ###############MINIOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO##################
                # send file to minio server
                VerbisStore().save_ner_minio(entity_recognizer,
                                             model_type=MODEL_TYPE_NER)
                document = {
                    "$set": {
                        "utterances": trained_utterances,
                    }
                }
                data_manager.update_datasource(query, document)

                document = {
                    "$set": {
                        "ner.status": ProjectManager.STATUS_TRAINED,
                        "ner.status_message":
                        "Entity training completed successfully.",
                        "ner.logs.train": ""
                    }
                }
                manager.update_config(query, document)

                self.logger.info(
                    "Completed training entity recognizer for service %s" %
                    self.serviceid)
            except (RuntimeError, Exception) as ex:
                self.logger.exception(ex, exc_info=True)
                self.logger.error(traceback.format_exc())
                if ex == "Cannot have number of folds n_folds=3 greater than the number of samples: 2.":
                    ex = "Add more utterances for entity training"
                document = {
                    "$set": {
                        "ner.status": ProjectManager.STATUS_TRAINING_FAILED,
                        "ner.status_message": ex,
                        "ner.logs.train": self.logger.handlers[-1].logs
                    }
                }
                manager.update_config(query, document)
        else:
            description = 'Unable to find project_config with given id.' \
                          'Please check your request params and retry'
            self.logger.error(description)