Ejemplo n.º 1
0
def predict_impl(doc, config, req_id=None):
    text = doc["text"]
    store = get_model_store()
    entity_tags = None
    response = {"text": text}
    corpus, datasources_map, projects_map = fetch_data_mappings(doc)
    truecased_text, retokenized_text, syn_processed_text, syn_processed_truecased_text, syn_processed_retokenized_text, syn_indexes = predict_preprocessing(
        text, store, datasources_map)

    for index, (serviceid, model_type,
                pos) in enumerate(get_requested_services(doc)):
        logger.info((serviceid, model_type, pos))
        engine = get_engine(config['custom_entity_model'])
        default_engine = get_engine(config['predefined_entity_model'])

        if model_type == MODEL_TYPE_NER:
            entities, entity_tags, parts_of_speech = ner_entities(
                store, config, serviceid, model_type, engine, truecased_text,
                retokenized_text, syn_processed_truecased_text,
                syn_processed_retokenized_text, syn_indexes, default_engine,
                datasources_map, projects_map, pos)
            response["entities"] = entities
            if (parts_of_speech
                    is not None) and (get(doc, 'pos', default=True) is True):
                response["parts_of_speech"] = parts_of_speech

        elif model_type == MODEL_TYPE_IR:
            prediction, probabilities = ir_entity_tags(
                store, serviceid, model_type, engine,
                syn_processed_truecased_text, entity_tags, datasources_map)
            logger.info("prediction......................%s" % prediction)
            logger.info("probabilities......................%s" %
                        probabilities)
            response["intent"] = {
                "top_intent": prediction,
                "confidence_level": probabilities
            }
            missed_text = missedUtterences(response, doc['serviceid'], req_id,
                                           syn_processed_retokenized_text)
            if missed_text is not syn_processed_retokenized_text:
                updateDatasource(serviceid, missed_text)
    if 'entities' not in response:
        response['entities'] = []
    if 'parts_of_speech' not in response:
        response['parts_of_speech'] = []
    return response
Ejemplo n.º 2
0
def tag(doc):
    store = get_model_store()
    text = get(doc, "text", default='')
    text = text.replace('\xa0', ' ')
    service_id = get(doc, "serviceid", default=None)
    text, original_text = store.change_case(text)
    query = {"serviceid": service_id}

    corpus = datasource_manager.find_datasource_by_service_id(doc["serviceid"])
    config = project_manager.find_model(query)
    datasources_map = {
        "predefined_entities": get(corpus, "predefined_entities", default=[]),
        "entities": get(corpus, "entities", default=[]),
        "patterns": get(corpus, "patterns", default=[]),
        "phrases": get(corpus, "phrases", default=[]),
        "distinct_token_list": get(corpus, "distinct_token_list", default=[]),
        "intents": get(corpus, "intents", default=[])
    }
    projects_map = {
        "custom_entity_model": get(config, "custom_entity_model",
                                   default=None),
        "ner_status": get(config, "ner.status", default=[]),
        "language": get(config, 'language', 'EN')
    }

    default_class_name = get(config, 'predefined_entity_model', None)
    custom_class_name = get(config, 'custom_entity_model', None)
    engine = get_engine(custom_class_name)
    last_trained = get(config, "ner.last_trained", default=None)
    default_engine = get_engine(default_class_name)
    model_name = get_model_name(service_id, "ner", engine)

    if get_engine(custom_class_name) not in get_all_corenlp_engines():
        if get(config, "ner.status", default=None) in ['trained', 'validated']:
            get_model_store().store.check_trained_time_and_reload(
                model_name, last_trained, service_id, "ner", engine,
                custom_class_name)

    response = store.tag(service_id, text, original_text, engine,
                         default_engine, default_class_name, datasources_map,
                         projects_map)

    response = remove_resolved_to(response)

    return response
Ejemplo n.º 3
0
    def on_post(self, req, resp):
        log_flag = False
        if app_config['BOTANALYTICS_LOG_FLAG'].upper() == "ON":
            log_flag, req_id, botanalytics, start_time = True, str(
                uuid.uuid4()), BotAnalyticsAPI(), datetime.datetime.now()
        doc = req.context['doc'] or {}
        try:
            store = get_model_store()
            serviceid = doc["serviceid"]
            datasource = manager.find_datasource_by_service_id(serviceid)
            phrases = get(datasource, "phrases")
            patterns = get(datasource, "patterns")
            entities = get(datasource, "entities", [])
            predefined_entities = get(datasource, "predefined_entities", [])
            utterances = get(datasource, "utterances")
            project_config = project_manager.find_config_by_id(serviceid)
            default_engine = get_engine(
                project_config['predefined_entity_model'])
            type = doc["type"]
            if type == "phrases":
                utterances = tag_phrase(phrases, utterances)
            if type == "patterns":
                utterances = tag_pattern(patterns, utterances)
            if type == "predefined":
                utterances = tag_all_predefined(store, patterns, phrases,
                                                entities, utterances,
                                                default_engine,
                                                predefined_entities,
                                                project_config)

            document = {"$set": {"utterances": utterances}}
            manager.update_datasource_by_service_id(serviceid, document)
            resp.data = json.dumps({"msg": "Successfully Updated"})
            resp.set_header('X-Powered-By', 'USTGlobal ICE')
            resp.status = falcon.HTTP_200
        except Exception as ex:
            logger.exception(ex)
            resp.data = {"msg": ex}
            resp.set_header('X-Powered-By', 'USTGlobal ICE')
            resp.status = falcon.HTTP_500
        finally:
            if log_flag:
                end_time = datetime.datetime.now()
                total_action_time = relativedelta(end_time, start_time)
                botanalytics.log(requesttype="nerrequests",
                                 serviceid=doc['serviceid'],
                                 req_id=req_id,
                                 action="BULK TAG",
                                 ner_req_timestamp=start_time.replace(
                                     microsecond=0).isoformat(),
                                 ner_req_end_timestamp=end_time.replace(
                                     microsecond=0).isoformat(),
                                 total_action_time=(
                                     total_action_time.hours * 60 * 60 * 1000 +
                                     total_action_time.minutes * 60 * 1000 +
                                     total_action_time.seconds * 1000) +
                                 (total_action_time.microseconds / 1000))
Ejemplo n.º 4
0
def cache_model(config,requested_services):
     for service_each in requested_services:
        serviceid = service_each[0]
        model_type = service_each[1]
        engine=""
        model_class = None
        if config[model_type]["status"] == 'trained' or config[model_type]["status"] == 'validated' or config[model_type]["status"] == 'validating':
            if model_type=="ner":
                model_class = config['custom_entity_model']
                engine = get_engine(model_class)
                model_name = serviceid + "-" + engine+ "-" +model_type
                last_trained = config["ner"]["last_trained"]
            else:
                model_name = serviceid + "-" +model_type
                last_trained = config["ir"]["last_trained"]
            get_model_store().store.check_trained_time_and_reload(model_name, last_trained, serviceid, model_type, engine, model_class)
def retag(serviceid, utterances, new_default_model_class, predefined_entities, project_config):
    store = get_model_store()

    for utterances_each in utterances:
        utterance = get(utterances_each, "utterance")
        case_converted_utterance = get(utterances_each, "case_converted_utterance")
        mapping = get(utterances_each, "mapping")
        mapping = json.loads(mapping)
        doc = {
            "serviceid": serviceid,
            "text": utterance
        }
        default_engine = get_engine(new_default_model_class)
        def_tags = store.tag_predefined(MODEL_TYPE_NER, default_engine, case_converted_utterance, utterance)
        def_tags = [tag for tag in def_tags if str(tag['tag']) in predefined_entities]
        final_tags = []
        cust_tags = get(mapping, "tags", [])
        final_tags = remove_overlapping(def_tags, cust_tags)
        mapping["tags"] = final_tags
        mapping = json.dumps(mapping)
        utterances_each["mapping"] = mapping
        utterances_each["ir_trained"] = False
        utterances_each["ner_trained"] = False
    return utterances