def __init__(self): self.train_file_location = Config.get_config_val( key="flatfile", key_1depth="location") + Config.get_config_val( key="flatfile", key_1depth="mongo_train_fileName") self.filename = Config.get_config_val( key="model", key_1depth="file", key_2depth="location" ) + Config.get_config_val( key="model", key_1depth="file", key_2depth="response_classifier")
class CacheService: # retrieve caching properties from config cache_ttl = int(Config.get_config_val(key="cache", key_1depth="ttl")) cache_max_size = int( Config.get_config_val(key="cache", key_1depth="max_size")) cache = redis.from_url( Config.get_config_val(key="cache", key_1depth="redis_url") ) # instead use a wrapper and retrieve from os.environ.get("REDIS_URL") # create a cache service # cache = TTLCache(maxsize=cache_max_size, ttl=cache_ttl) @classmethod def get_object(cls, key): """ retrieves the object from cache :param key: :return: """ value = None try: if cls.cache.exists(key) == 1: value = cls.cache.get(key).decode("utf-8") else: value = None except KeyError as ke: logger.error("Either key expired or not present : {0}".format(ke)) value = None except Exception as e: logger.error( "internal exception while using cache : {0}".format(e)) value = None return value @classmethod def set_object(cls, key, value): logger.info("key : {0}, value : {1}".format(key, value)) cls.cache.set(key, value, ex=cls.cache_ttl) @classmethod def remove_objects(cls, key): try: logger.info("removing value from cache for : {0}".format(key)) cls.cache.delete(key) except KeyError as ke: logger.error( "already removed from cache for key : {0}. Exception is : {1}". format(key, ke))
def predict_response(cls, token, broker_id, lang, query): """ This returns the prediction for a given query :param token: authentication session token :param broker_id: broker for which query is requested :param lang: language for which model was trained :param query: question being asked :return: response object """ response = None logger.info("***************inside training service retrain*******************") try: # get user from token user = AuthService.get_logged_in_user(token=token) if user is not None: logger.info(user) broker = BrokerDao.get_broker_by_id(broker_id=broker_id) if broker is not None: logger.info(broker) model_type = Config.get_config_val(key="model", key_1depth="classifier", key_2depth="model") vector_type = Config.get_config_val(key="model", key_1depth="vectorizer", key_2depth="vector") logger.info(model_type) logger.info(vector_type) classifier_instance = TrainedClassifierDao.get_trained_classifier_obj_from_db(user=user, broker=broker, model_type=model_type, vector_type=vector_type, lang=lang) if classifier_instance is not None: response = classifier_instance.predict(user=user, broker=broker, model_type=model_type, vector_type=vector_type, lang=lang, query=query) else: response = "could not find trained_classifier" else: response = "Could not find broker" else: response = "Unauthorized access/ session expired. Please re-login" except Exception as e: logger.error("error : {0}".format(e)) response = "Error occurred" logger.error(response) return response
def main(): initialize() # Read telegram token from config telegram_token = Config.get_config_val(key="auth", key_1depth="telegram", key_2depth="token") # telegram_token = config['telegram-token'] # Create the Updater and pass it your bot's token. # Make sure to set use_context=True to use the new context based callbacks # Post version 12 this will no longer be necessary updater = Updater(telegram_token, use_context=True) dp = updater.dispatcher # Add conversation handler with the states GENDER, PHOTO, LOCATION and BIO conv_handler = ConversationHandler( entry_points=[CommandHandler('start', start)], states={ QUERY: [MessageHandler(Filters.text, query)], CANCEL: [CommandHandler('cancel', cancel)], }, fallbacks=[CommandHandler('cancel', cancel)], allow_reentry=True) dp.add_handler(conv_handler) # log all errors dp.add_error_handler(error) updater.start_polling() updater.idle()
def __init__(self): # column names self.col_lang = Config.get_config_val(key="df_columns", key_1depth="col_lang") self.col_category = Config.get_config_val(key="df_columns", key_1depth="col_category") self.col_query = Config.get_config_val(key="df_columns", key_1depth="col_query") self.col_response = Config.get_config_val(key="df_columns", key_1depth="col_response") self.col_variables = Config.get_config_val(key="df_columns", key_1depth="col_variables") self.col_input_circumstance = Config.get_config_val(key="df_columns", key_1depth="col_input_circumstance") self.col_output_circumstance = Config.get_config_val(key="df_columns", key_1depth="col_output_circumstance") self.train_file_location = Config.get_config_val(key="flatfile", key_1depth="location") + Config.get_config_val( key="flatfile", key_1depth="mongo_train_fileName") self.trained_classifier_obj = None self.train_list = [] self.df_train_flatfile = pd.DataFrame()
def __init__(self, vector_type, model_type, train_file_location, trained_classifier): # training file location self.train_file_location = train_file_location self.trained_classifier = trained_classifier # language column to be used for dictionary of predictors self.col_lang = Config.get_config_val(key="df_columns", key_1depth="col_lang") # dependent columns self.col_query = Config.get_config_val(key="df_columns", key_1depth="col_query") # target columns self.col_category = Config.get_config_val(key="df_columns", key_1depth="col_category") # derived column with label encoding self.col_category_numeric = self.col_category + "_numeric" # Corresponding columns which will be extracted from predicted category self.col_response = Config.get_config_val(key="df_columns", key_1depth="col_response") self.col_variables = Config.get_config_val(key="df_columns", key_1depth="col_variables") self.col_input_circumstance = Config.get_config_val( key="df_columns", key_1depth="col_input_circumstance") self.col_output_circumstance = Config.get_config_val( key="df_columns", key_1depth="col_output_circumstance") # additional data about model and vector self.model_type = model_type self.model_category = model_factory.get_model(model_type) self.model = None self.vector_type = vector_type self.vector = vectorizer_factory.get_vector(vector_type) self.train_df = None self.unique_train_df = None self.X_train = None self.X_train_vect = None self.X_test = None self.X_test_vect = None self.y_train = None self.y_test = None
def __init__(self, unique_train_df, model, vector, use_decision_function, decision_boundary): """ constructor to initialize class params :param unique_train_df: unique classes :param model: model used for training :param vector: vectorizer used for training :param use_decision_function: whether to use decision function or not :param decision_boundary: threshold for decision function """ self.model = model self.vector = vector self.use_decision_function = use_decision_function self.decision_boundary = decision_boundary self.unique_train_df = unique_train_df logger.info(self.unique_train_df.head(1)) # column names self.col_lang = Config.get_config_val(key="df_columns", key_1depth="col_lang") self.col_category = Config.get_config_val(key="df_columns", key_1depth="col_category") # derived column with label encoding self.col_category_numeric = self.col_category + "_numeric" self.col_query = Config.get_config_val(key="df_columns", key_1depth="col_query") self.col_response = Config.get_config_val(key="df_columns", key_1depth="col_response") self.col_variables = Config.get_config_val(key="df_columns", key_1depth="col_variables") self.col_input_circumstance = Config.get_config_val( key="df_columns", key_1depth="col_input_circumstance") self.col_output_circumstance = Config.get_config_val( key="df_columns", key_1depth="col_output_circumstance")
from mongoengine import * from modules.utils.yaml_parser import Config import json from datetime import datetime url = Config.get_config_val(key="mongodb", key_1depth="url") db = Config.get_config_val(key="mongodb", key_1depth="db") connect(db, host=url) class User(Document): first_name = StringField(required=True, max_length=100) last_name = StringField(required=True, max_length=100) email = StringField(required=True, max_length=255) password = StringField(required=True, max_length=100) age = IntField(required=False) gender = StringField(required=False, max_length=1) created_on = DateTimeField(required=True) telegram_oAuth_token = StringField(required=False, max_length=100) # TODO to use it later. Not yet used. Also extend it from Document class Language: lang_code = StringField(required=True, max_length=8) class Broker(Document): user_id = ReferenceField(User) broker_name = StringField(required=True, max_length=100) default_lang = StringField(required=True, max_length=8, default="en-US") # TODO replace with Language
def bulk_insert_documents(self): # load the old file old_train_file_location = Config.get_config_val(key="flatfile", key_1depth="location") + Config.get_config_val( key="flatfile", key_1depth="mongo_train_fileName") consumer_ques = pd.read_csv(old_train_file_location) # first change the column names consumer_ques.rename( columns={'question-category': Config.get_config_val(key="df_columns", key_1depth="col_category"), 'question': Config.get_config_val(key="df_columns", key_1depth="col_query"), 'answer': Config.get_config_val(key="df_columns", key_1depth="col_response")}, inplace=True) # in order to create 1 row per category, we will have to split data based on every category. # 1. extract unique categories in data categories = consumer_ques[Config.get_config_val(key="df_columns", key_1depth="col_category")].unique() # 2. iterate over each category for cat in categories: print('category : {0}'.format(cat)) trainObj = None # 3. split data per category df = consumer_ques[consumer_ques[Config.get_config_val(key="df_columns", key_1depth="col_category")] == cat] # 4. extract query training_queries = df[[Config.get_config_val(key="df_columns", key_1depth="col_query")]].values.T.tolist()[ 0] # 4.1 extract language lang = df[Config.get_config_val(key="df_columns", key_1depth="col_lang")].unique()[0] # 4.2 extract category - category is already extracted in "cat" # 5. create train object trainObj = Train(category=cat, lang=lang, training_queries=training_queries) # 6. create circumstance circumstance = Circumstance(input_circumstance=df[ Config.get_config_val(key="df_columns", key_1depth="col_input_circumstance")].unique()[0], output_circumstance=df[Config.get_config_val(key="df_columns", key_1depth="col_output_circumstance")].unique()[ 0]) # circumstance = { # 'input_circumstance' : df['input_circumstance'].unique()[0], # 'output_circumstance' : df['output_circumstance'].unique()[0] # } trainObj.circumstance = circumstance # 7. create response responseList = [] textList = [] textList.append(df[Config.get_config_val(key="df_columns", key_1depth="col_response")].unique()[0]) response = Response(text=textList, custom='') # responseObj = { # 'text' : textList, # 'custom' : '' # } # responseList.append(responseObj) trainObj.response.append(response) # 8. create variables variables = json.loads(df[Config.get_config_val(key="df_columns", key_1depth="col_variables")].unique()[0]) for var in variables: varObj = Variables(name=var.get('name'), type=var.get('type'), value=var.get('value'), io_type=var.get('io_type')) trainObj.variables.append(varObj) # 9. save the object trainObj.save()