Example #1
0
def public_trained_projects(organisation_name):
    """
    :param organisation_name:
    :return:
    """
    manager = ProjectManager()
    query = {
        "$and": [{
            "organisation_name": organisation_name
        }, {
            "visibility": "public"
        }, {
            "ner.status": "trained"
        }, {
            "ir.status": "trained"
        }, {
            "masterBot": False
        }]
    }
    # query = {"$and": [{"visibility": "public"}, {"ner.status": "trained"}, {"ir.status": "trained"},
    # {"masterBot": False}]}
    projection = {"name": 1, "serviceid": 1}
    data = manager.exists(query)
    if data:
        projects_to_import = manager.find(query, projection)
        logger.info("public trained projects found in PR DB")
        return projects_to_import
    else:
        return False
Example #2
0
def user_trained_projects(userid):
    """
    :param userid:
    :return:
    """
    manager = ProjectManager()
    query = {
        "$and": [{
            "createdBy": ObjectId(userid)
        }, {
            "ner.status": "trained"
        }, {
            "ir.status": "trained"
        }, {
            "visibility": "private"
        }, {
            "masterBot": False
        }]
    }
    # query = {"$and": [{"createdBy": ObjectId(userid)}, {"ner.status": "trained"}, {"ir.status": "trained"},
    #                   {"visibility": "private"}]}
    projection = {"name": 1, "serviceid": 1}
    data = manager.exists(query)
    if data:
        projects_to_import = manager.find(query, projection)
        logger.info("user trained projects found in PR DB.")
        return projects_to_import
    else:
        return False
def uncache_project_if_exists(serviceid):
    model_store = get_model_store()
    manager = ProjectManager()
    query = {"serviceid": serviceid}
    document = manager.find_model(query)
    model_lists = model_store.get_active_models()
    if document is not None:
        engine = get(document, "engine", "ICE")
    else:
        engine = "ICE"

    model_name = get_model_name(serviceid, MODEL_TYPE_NER, engine)
    if model_name in model_lists:
        model_store.unpublish(serviceid, MODEL_TYPE_NER, engine)

    model_name = get_model_name(serviceid, MODEL_TYPE_IR, engine=None)
    if model_name in model_lists:
        model_store.unpublish(serviceid, MODEL_TYPE_IR, engine=None)

    # delete model file
    model_store_path = os.path.join(home_dir + os.sep + '.verbis/store')

    if os.path.exists(os.path.join(model_store_path + os.sep + serviceid)):
        shutil.rmtree(os.path.join(model_store_path + os.sep + serviceid))

    logger.info("uncache_project_if_exists done.")
Example #4
0
def change_publish_status():
    '''
    uncache model if their last date of access more than 60 days.
    '''
    try:
        manager = ProjectManager()
        query = {}
        data = manager.find_all_model(query)
        for document in data:
            try:
                ner_status= get(document,'ner.status',"new")
                ir_status= get(document,'ir.status',"new")
                print(ner_status, ir_status)
                serviceid= document['serviceid']

                if ner_status=="published" and ir_status=="published":
                    ner_status='trained'
                    ir_status = 'trained'
                    print('condition1')

                elif ner_status=="published" and ir_status!="published":
                    ner_status = 'trained'
                    print('condition2')

                elif ir_status=="published" and ner_status!="published":
                    ir_status = 'trained'
                    print('condition3')

                manager.update_ir_ner_status(serviceid, ner_status, ir_status)

            except Exception as e:
                print("exception : ",e)
    except Exception as e:
        print("exception : ",e)
Example #5
0
def update_last_access_to_predict_api(serviceid):
    query = {"serviceid": serviceid}
    document = {
        "$set": {
            "lastAccessed": datetime.datetime.utcnow(),
        }
    }
    manager = ProjectManager()
    manager.update_config(query, document)
    def on_post(self, req, resp):
        doc = req.context['doc'] or {}

        try:
            home_dir = os.path.expanduser('~')
            model_store_path = os.path.join(home_dir + os.sep +
                                            '.verbis/store')
            model_store = get_model_store()
            manager = ProjectManager()
            serviceid = doc['serviceid']
            query = {'serviceid': serviceid}
            data = manager.find_model(query)
            engine = get(data, "engine", "ICE")
            model_name = get_model_name(serviceid, MODEL_TYPE_NER, engine)

            model_lists = model_store.get_active_models()
            logger.info("model lists -  %s" % model_lists)
            if model_name in model_lists:
                model_store.unpublish(serviceid, MODEL_TYPE_NER, engine)

            model_name = get_model_name(serviceid, MODEL_TYPE_IR, engine=None)
            logger.info(model_name)
            if model_name in model_lists:
                model_store.unpublish(serviceid, MODEL_TYPE_IR, engine=None)

            logger.info("path is %s" %
                        os.path.join(model_store_path + os.sep + serviceid))
            # delete model file from local
            if os.path.exists(
                    os.path.join(model_store_path + os.sep + serviceid)):
                # print "path exists, so do rm."
                shutil.rmtree(
                    os.path.join(model_store_path + os.sep + serviceid))
                # delete model file from minio
                VerbisStore().remove_models_from_remote(serviceid)
                logger.info("files removed successfully")

            # update DB
            document = {"$set": {"ner.status": 'new', "ir.status": 'new'}}
            manager.update_config(query, document)

        except AssertionError as ae:
            logger.exception(ae, exc_info=True)
            logger.error(traceback.format_exc())
            raise falcon.HTTPPreconditionFailed(
                'Service publish condition failed', traceback.format_exc())
        except Exception as ex:
            logger.exception(ex, exc_info=True)
            logger.error(traceback.format_exc())
            description = 'Internal Server Error, Please try again later'
            raise falcon.HTTPServiceUnavailable('Service Outage', description,
                                                30)

        resp.set_header('X-Powered-By', 'USTGlobal Verbis')
        resp.status = falcon.HTTP_200
def validate_project_organisation(project_id, organisation_name):
    manager = ProjectManager()
    query = {
        "$and": [{
            "_id": ObjectId(project_id)
        }, {
            "organisation_name": organisation_name
        }]
    }
    if manager.exists(query):
        return True
    else:
        return False
Example #8
0
def validate_service_id_and_cache(doc):
    """
    # cache model for respective project if not cached during prediction call.
    :param doc:
    :return:
    """
    manager = ProjectManager()
    query = {"serviceid": get(doc, "serviceid")}
    config = manager.find_model(query)
    if config is None:
        raise Exception("Invalid Service ID.")
    else:
        serviceid_info = get_requested_services(doc)
        cache_model(config, serviceid_info)
    return config
def test_validate_service_id_and_cache_data(mocker):
    doc = dict(
        text=
        "I would like to have an appointment of Dr Merin on 15 / 10 / 2018 for Eliza",
        serviceid="MedicalAssistant-test",
        pos=True,
        intent=True,
        entity=True)
    conf = dict(
        predefined_entity_model=
        "ice_commons.er.engines.spacy_ner.SpacyDefaultNER",
        custom_entity_model="ice_commons.er.engines.mitie_ner.MitieCustomNER",
        serviceid="MedicalAssistant-test",
        language="EN",
        ner=dict(status="new"),
        ir=dict(status="new"))
    manager = mocker.patch(
        'ice_rest.rest.services.parse.predict.ProjectManager',
        return_value=ProjectManager())
    mocker.patch.object(manager.return_value, 'find_model', return_value=conf)
    mocker.patch('ice_rest.rest.services.parse.predict.get_requested_services',
                 return_value=[('MedicalAssistant-test', 'ner', True),
                               ('MedicalAssistant-test', 'ir', None)])
    mocker.patch('ice_rest.rest.services.parse.predict.cache_model',
                 return_value=None)
    config = validate_service_id_and_cache(doc)
    assert config == conf
def delete_project_if_exists(given_serviceid):
    '''
    delete_project_if_exists
    '''

    manager = ProjectManager()
    query = {"serviceid": given_serviceid}
    data = manager.exists(query)
    if data:
        manager.delete(query)
        logger.info("project deleted from PR DB.")
    ds_manager = DatasourceManager()
    data = ds_manager.exists(query)
    if data:
        ds_manager.delete(query)
        logger.info("project deleted from DS DB.")
Example #11
0
def get_published_models():
    manager = ProjectManager()
    ids = manager.get_models_by_status(manager.STATUS_PUBLISHED)

    df = pd.DataFrame(ids)
    results = []
    for index, row in df.iterrows():
        service_id = row['serviceid']
        ner_status = get(row, "ner.status")
        engine = get(row, "engine")
        if ner_status == ProjectManager.STATUS_PUBLISHED:
            results.append((service_id, 'ner', engine))
        ir_status = get(row, "ir.status")
        if ir_status == ProjectManager.STATUS_PUBLISHED:
            results.append((service_id, 'ir', None))
    return results
Example #12
0
def test_update_last_access_to_predict_api(mocker):
    manager = mocker.patch(
        'ice_rest.rest.services.parse.predict.ProjectManager',
        return_value=ProjectManager())
    mocker.patch.object(manager.return_value,
                        'update_config',
                        return_value=None)
    assert update_last_access_to_predict_api("") == None
Example #13
0
def test_validate_service_id_and_cache_null(mocker):
    manager = mocker.patch(
        'ice_rest.rest.services.parse.predict.ProjectManager',
        return_value=ProjectManager())
    mocker.patch.object(manager.return_value, 'find_model', return_value=None)
    with pytest.raises(Exception) as exception:
        assert validate_service_id_and_cache({})
    assert str(exception.value) == "Invalid Service ID."
Example #14
0
def updateStatus(serviceid, train_ner, train_ir):
    manager = ProjectManager()
    query = {"serviceid": serviceid}
    config = manager.find_model(query)
    if config is not None:
        if train_ner is True:
            document = {
                "$set": {
                    "ner.status":
                    ProjectManager.STATUS_HOLD,
                    "ner.status_message":
                    "Awaiting the completion of entity training."
                }
            }
            manager.update_config(query, document)
        if train_ir is True:
            document = {
                "$set": {
                    "ir.status":
                    ProjectManager.STATUS_HOLD,
                    "ir.status_message":
                    "Awaiting the completion of intent training."
                }
            }
            manager.update_config(query, document)
Example #15
0
def db_add_dict(serviceid, text):
    manager = DatasourceManager()
    manager2 = ProjectManager()
    project_config = manager2.find_config_by_id(serviceid)
    language_code = get(project_config, "language", "EN")
    corpus = manager.find_datasource_by_service_id(serviceid)
    distinct_token_list = get(corpus, "distinct_token_list")
    if distinct_token_list is None:
        distinct_token_list = []
    stopword_removed_text = stopword_remover(text, language_code)
    distinct_token_list.extend(list(set(remove_single_character_tokens(stopword_removed_text))))
    distinct_token_list = list(set(distinct_token_list))
    document = {
        "$set": {
            "distinct_token_list": distinct_token_list,
        }
    }
    manager.update_datasource_by_service_id(serviceid, document)
Example #16
0
def uncache_project_if_exists():
    '''
    uncache model if their last date of access more than X days.
    '''
    try:
        manager = ProjectManager()
        query = {}
        data = manager.find_all_model(query)
        for document in data:
            try:
                serviceid = document['serviceid']
                last_accessed = document['lastAccessed']
                days_diff = datetime.today() - timedelta(
                    days=int(app_config['MINIO_DAYS']))
                ner_status = document['ner']['status']
                ir_status = document['ir']['status']

                if last_accessed < days_diff and ner_status in [
                        'trained', 'validated'
                ] and ir_status in ['trained', 'validated']:
                    logger.info("removal process starts")
                    requests.post(
                        url="http://localhost:8021/api/parse/cache/remove",
                        json={'serviceid': serviceid},
                        headers={'Content-type': 'application/json'})
                    logger.info(
                        "Model with service id: %s  is removed from minio cache since it was last accessed 90 "
                        "days back. Please retrain." % serviceid)

            except Exception as ex:
                logger.exception(ex, exc_info=True)
                logger.error(traceback.format_exc())
                description = 'Internal Server Error, Please try again later'
                raise falcon.HTTPServiceUnavailable('Service Outage',
                                                    description, 30)
    except Exception as ex:
        logger.exception(ex, exc_info=True)
        logger.error(traceback.format_exc())
        description = 'Internal Server Error, Please try again later'
        raise falcon.HTTPServiceUnavailable('Service Outage', description, 30)
def put_corenlp_modelname(serviceid, model_name):
    manager = ProjectManager()
    query = {"serviceid": serviceid}
    document = {"$set": {"corenlp_model_name": model_name.split("/")[-1]}}
    manager.update_config(query, document)
def get_corenlp_modelname(serviceid):
    manager = ProjectManager()
    query = {"serviceid": serviceid}
    ds = manager.find_model(query)
    return get(ds, "corenlp_model_name", default=None)
def get_ner_status(serviceid):
    manager = ProjectManager()
    query = {"serviceid": serviceid}
    ds = manager.find_model(query)
    return get(ds, "ner.status", default=[])
def get_custom_class_name(serviceid):
    manager = ProjectManager()
    query = {"serviceid": serviceid}
    ds = manager.find_model(query)
    return get(ds, "custom_entity_model", default=[])
Example #21
0
import logging
from pydash import get
from ice_commons.data.dl.manager import DatasourceManager, ProjectManager
from ice_rest.rest.services.parse.impl.common.missed_utterances_impl import missedUtterences
from ice_rest.rest.services.parse.impl.common.store_utils import get_model_store, get_requested_services
from ice_commons.utility.custom_tokenizer import tokenize_utterance
from ice_commons.core.model_utils import get_engine
from ice_commons.utils import MODEL_TYPE_IR, MODEL_TYPE_NER
import re
from collections import OrderedDict

logger = logging.getLogger(__name__)
project_manager = ProjectManager()
datasource_manager = DatasourceManager()


def get_proba(intent_list):
    prob = {}
    for intent_each in intent_list:
        if intent_each['name'] != "No intent":
            prob[intent_each['name']] = "0.0%"
    prob["No intent"] = "100.0%"
    return prob


def updateDatasource(serviceid, missed_text):
    """

    :param serviceid:
    :param missed_text:
    :return:
Example #22
0
    def train(self, train_intent):
        """
        :param doc:
        :param n_test_percent:
        :return:
        """
        manager = ProjectManager()
        query = {"serviceid": self.serviceid}
        config = manager.find_model(query)
        if config is not None:
            try:
                document = {
                    "$set": {
                        "ner.status": ProjectManager.STATUS_TRAINING,
                        "ner.status_message":
                        "Entity training is in progress.",
                        "ner.last_trained": datetime.datetime.utcnow()
                    }
                }
                if (train_intent is True):
                    document = {
                        "$set": {
                            "ner.status": ProjectManager.STATUS_TRAINING,
                            "ner.status_message":
                            "Entity training is in progress.",
                            "ir.status": ProjectManager.STATUS_HOLD,
                            "ir.status_message":
                            "Awaiting the completion of entity training.",
                            "ner.last_trained": datetime.datetime.utcnow()
                        }
                    }
                manager.update_config(query, document)

                # starting actual training
                data_manager = DatasourceManager()
                self.logger.info("Starting training of service %s" %
                                 self.serviceid)
                corpus = data_manager.find_model(query)
                custom_entity_model = get(config, "custom_entity_model")
                entity_recognizer = self.instantiate_trainer(
                    custom_entity_model)
                trained_utterances = entity_recognizer.train(corpus)
                if entity_recognizer.get_engine(
                ) not in get_all_corenlp_engines():
                    VerbisStore().save_ner(entity_recognizer,
                                           model_type=MODEL_TYPE_NER)
                ###############MINIOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO##################
                # send file to minio server
                VerbisStore().save_ner_minio(entity_recognizer,
                                             model_type=MODEL_TYPE_NER)
                document = {
                    "$set": {
                        "utterances": trained_utterances,
                    }
                }
                data_manager.update_datasource(query, document)

                document = {
                    "$set": {
                        "ner.status": ProjectManager.STATUS_TRAINED,
                        "ner.status_message":
                        "Entity training completed successfully.",
                        "ner.logs.train": ""
                    }
                }
                manager.update_config(query, document)

                self.logger.info(
                    "Completed training entity recognizer for service %s" %
                    self.serviceid)
            except (RuntimeError, Exception) as ex:
                self.logger.exception(ex, exc_info=True)
                self.logger.error(traceback.format_exc())
                if ex == "Cannot have number of folds n_folds=3 greater than the number of samples: 2.":
                    ex = "Add more utterances for entity training"
                document = {
                    "$set": {
                        "ner.status": ProjectManager.STATUS_TRAINING_FAILED,
                        "ner.status_message": ex,
                        "ner.logs.train": self.logger.handlers[-1].logs
                    }
                }
                manager.update_config(query, document)
        else:
            description = 'Unable to find project_config with given id.' \
                          'Please check your request params and retry'
            self.logger.error(description)
Example #23
0
    def deploy(self):
        """
        :param doc:
        :param n_test_percent:
        :return:
        """
        manager = ProjectManager()
        query = {
            "serviceid": self.serviceid
        }
        config = manager.find_model(query)
        if config is not None:
            try:
                trained_data, df = get_ir_dataset(self.serviceid, self.logger)
                self.logger.info("Unique labels %s" % np.unique(df.intent.tolist()))

                group = df.groupby(['intent']).agg('count')
                stats = group.reset_index().to_json(orient="records")
                useSelector = False
                if (len(group) > 1):
                    useSelector = True
                self.logger.info(stats)

                document = {
                    "$set": {
                        "ir.status": ProjectManager.STATUS_TRAINING,
                        "ir.status_message": "Intent training is in progress.",
                        "ir.dataset.stats": stats,
                        "ir.last_trained": datetime.datetime.utcnow()
                    }
                }
                manager.update_config(query, document)

                ir = IntentRecognizer(DEFAULT_CONFIG, serviceid=self.serviceid, useSelector=useSelector)
                self.logger.info("Starting fitting for deployment")
                ir.fit(df, df.intent)
                self.logger.info("Fitting for deployment completed")

                VerbisStore().save_ir(ir)
                ###############MINIOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO##################
                # send file to minio server
                # no engine. .dat - extension
                VerbisStore().save_ir_minio(ir)
                document = {
                    "$set": {
                        "utterances": trained_data,
                    }
                }
                data_manager = DatasourceManager()
                data_manager.update_datasource(query, document)

                document = {
                    "$set": {
                        "ir.status": ProjectManager.STATUS_TRAINED,
                        "ir.status_message": "The Intent model has been successfully trained",
                        "ir.logs.deploy": ""
                    }
                }
                manager.update_config(query, document)
            except (RuntimeError, ValueError, Exception) as e:
                self.logger.error(e)
                message = e
                if (e == "After pruning, no terms remain. Try a lower min_df or a higher max_df."
                        or e == "max_df corresponds to < documents than min_df"):
                    message = "Sufficient vocabulary to build the model is not available. Please add more utterances."
                elif e == "Invalid type float for labels":
                    message = "Add more intents for intent training"
                document = {
                    "$set": {
                        "ir.status": ProjectManager.STATUS_TRAINING_FAILED,
                        "ir.status_message": message,
                        "ir.logs.deploy": self.logger.handlers[-1].logs
                    }
                }
                manager.update_config(query, document)
                traceback.print_exc()
        else:
            description = 'Unable to find project_config with given id.' \
                          'Please check your request params and retry'
            self.logger.error(description)