class UpdateProfile: def __init__(self): self.log = LoggerUtil(self.__class__.__name__).get() self.config = ConfigUtil.get_config_instance() self.db_utils = DBUtils() def get_client(self): address = self.config['mongo']['address'] port = self.config['mongo']['port'] auth_db = self.config['mongo']['auth_db'] is_auth_enabled = self.config['mongo']['is_auth_enabled'] username = self.config['mongo']['username'] password = self.config['mongo']['password'] client = self.db_utils.get_client(address=address, port=port, username=username, password=password, auth_db=auth_db, is_auth_enabled=is_auth_enabled) return client @staticmethod def insert_query(source, destination, time, date, num_seats_req, phone_num, email, preferences): query = dict() query['source'] = source query['destination'] = destination query['time'] = time query['date'] = date query['num_seats_req'] = num_seats_req query['phone_num'] = phone_num query['email'] = email query['preferences'] = preferences return json.dumps(query) def update(self, source, destination, time, date, num_seats_req, phone_num, email, preferences): client = self.get_client() users_database_name = self.config['mongo']['users_database'] users_hist_collection_name = self.config['mongo'][ 'users_hist_collection_name'] database = client[users_database_name] users_hist_collection = database[users_hist_collection_name] query = self.insert_query(source=source, destination=destination, time=time, date=date, num_seats_req=num_seats_req, phone_num=phone_num, email=email, preferences=preferences) try: users_hist_collection.insert(query) self.log.info( "Updated profile for user with email : {}".format(email)) except Exception as e: self.log.error("Error : {}".format(e))
class UserRegistration: def __init__(self): self.log = LoggerUtil(self.__class__.__name__).get() self.config = ConfigUtil.get_config_instance() self.db_utils = DBUtils() def get_client(self): address = self.config['mongo']['address'] port = self.config['mongo']['port'] auth_db = self.config['mongo']['auth_db'] is_auth_enabled = self.config['mongo']['is_auth_enabled'] username = self.config['mongo']['username'] password = self.config['mongo']['password'] client = self.db_utils.get_client(address=address, port=port, username=username, password=password, auth_db=auth_db, is_auth_enabled=is_auth_enabled) return client @staticmethod def insert_query(name, email, contact, password): query = dict() query['name'] = name query['email'] = email query['contact'] = contact query['password'] = password query['auth'] = True return query def add_user(self, **kwargs): name = kwargs['name'] email = kwargs['email'] contact = kwargs['contact'] password = kwargs['password'] client = self.get_client() users_database_name = self.config['mongo']['users_database'] add_users_collection_name = self.config['mongo'][ 'add_users_collection'] database = client[users_database_name] add_users_collection = database[add_users_collection_name] query = self.insert_query(name, email, contact, password) try: add_users_collection.insert(query) self.log.info("Added user with username : {}".format(email)) except Exception as e: self.log.error("Error : {}".format(e))
class LinguisticModel: def __init__(self): self.log = LoggerUtil(self.__class__.__name__).get() self.config = ConfigUtil.get_config_instance() self.read_data = ReadData() self.bayes = NaiveBayes() self.lr = LRModel() self.dt = DecisionTree() self.rf = RandomForest() self.et = ExtraTree() self.adaboost = Adaboost() self.mlp = MLP() @staticmethod def split_data(df, labels): x_train, x_test, y_train, y_test = train_test_split(df, labels, stratify=labels, shuffle=True) return x_train, x_test, y_train, y_test @staticmethod def custom_word_tokenizer(tokens): return tokens def vectorize_data(self, data): """ While we needed subject to estimate if the mail contains action item or not, we don't need it for model selection. :return: """ vectorizer = TfidfVectorizer(tokenizer=self.custom_word_tokenizer, sublinear_tf=True, analyzer='word', lowercase=False, max_features=2500) vectorizer.fit(data) return vectorizer @staticmethod def get_tagged_dataset(df): return df[~df["labels"]]["message"].head(n=1250), zeros(1250, dtype=bool) def train_models(self, x_train_features, x_test_features, y_train, y_test): nb_model = self.bayes.main(x_train=x_train_features, y_train=y_train, x_test=x_test_features, y_test=y_test) lr_model = self.lr.main(x_train=x_train_features, y_train=y_train, x_test=x_test_features, y_test=y_test) dt_model = self.dt.main(x_train=x_train_features, y_train=y_train, x_test=x_test_features, y_test=y_test) rf_model = self.rf.main(x_train=x_train_features, y_train=y_train, x_test=x_test_features, y_test=y_test) et_model = self.et.main(x_train=x_train_features, y_train=y_train, x_test=x_test_features, y_test=y_test) adaboost_model = self.adaboost.main(x_train=x_train_features, y_train=y_train, x_test=x_test_features, y_test=y_test) mlp_model = self.mlp.main(x_train=x_train_features, y_train=y_train, x_test=x_test_features, y_test=y_test) model_dict = { "naive_bayes": nb_model, "logistic_regression": lr_model, "decision_tree": dt_model, "random_forest": rf_model, "extra_tree": et_model, "adaboost": adaboost_model, "mlp": mlp_model } return model_dict def save_models(self, model_dict, models_path, vectorizer): joblib.dump(vectorizer, models_path + "/" + "vectorizer.mdl") for model_name, stat_dict in model_dict.items(): if model_name in [ "naive_bayes", "logistic_regression", "decision_tree", "random_forest", "extra_tree", "adaboost", "mlp" ]: joblib.dump(stat_dict["model"], models_path + "/" + model_name + ".mdl") else: self.log.error("Non-Standard Model referenced") def get_best_model(self, model_dict): best_acc = 0 best_model_name = "" for model_name, model_stat in model_dict.items(): for key, value in model_stat.items(): if key == "metrics": accuracy = value["accuracy"] if accuracy > best_acc: best_model_name = model_name self.log.info("Best model determined is : {}".format(best_model_name)) return model_dict[best_model_name]["model"] def main(self): models_path = self.config["models_path"] untagged_data_df = self.read_data.prepare_data(n_rows=3000) untagged_data, untagged_labels = untagged_data_df[ "message"], untagged_data_df["labels"] vectorizer = self.vectorize_data(untagged_data) x_train, x_test, y_train, y_test = self.split_data( untagged_data, untagged_labels) x_train_features = vectorizer.transform(x_train) x_test_features = vectorizer.transform(x_test) model_dict = self.train_models(x_train_features, x_test_features, y_train, y_test) self.save_models(model_dict=model_dict, models_path=models_path, vectorizer=vectorizer) best_model = self.get_best_model(model_dict) return best_model, vectorizer