class Classifier(): def __init__(self, *args, **kwargs): if kwargs == None: config = { 'text_dir': 'data/dataset/doc', 'dataset': 'data/matrix', 'bag_of_words': 'data/bag_of_words', 'train_model': 'data/model/doc.model', 'is_unicode': False } else: config = kwargs self.ml = MachineLearning(**config) # choose your algorithm self.algo = self.ml.NiaveBayes() # algo = ml.DecisionTree(criterion='gini', prune='depth', max_depth=50, min_criterion=0.05) self.prepro = Preprocessing(**config) # print ("Start testing with the classifier !") self.model = self.algo.load_model() def classify(self, question="hello ai"): # preprocess mat = self.prepro.loading_single_doc(question, 'doc_freq', 1) prediction = self.algo.predict(self.model, [mat]) label = self.ml.to_label(prediction, 'data/bag_of_words/label_match.pickle') print(label) return label
def classify(config, text): """ Text classification """ # Preprocess: transform text to frequency prepro = Preprocessing(**config) mat = prepro.loading_single_doc(text, 'doc_freq', config['threshold']) # Initialize only 3 algorithms at the moment ml = MachineLearning(**config) # Perform prediction # Naive Bayes nb_algo = ml.NiaveBayes() nb_model = nb_algo.load_model() nb_prediction = nb_algo.predict(nb_model, [mat]) # ANN nn_algo = ml.NeuralNetwork(hidden_layer_sizes=(250, 100),\ learning_rate=0.012, momentum=0.5, random_state=0, max_iter=200, activation='tanh') nn_model = nn_algo.load_model() nn_prediction = nn_algo.predict(nn_model, [mat]) # DT dt_algo = ml.DecisionTree(criterion='gini', prune='depth', max_depth=30, min_criterion=0.05) dt_model = dt_algo.load_model() #norm_mat = prepro.normalize_dataset(np.array([mat])) # use with decision tree only #norm_mat = prepro.normalize_dataset(np.array([mat])) # use with decision tree only #dt_prediction = dt_algo.predict(dt_model, norm_mat) dt_prediction = dt_algo.predict(dt_model, np.array([mat])) # Get the best labe outputed by BN, NN, DT nb_label = ml.to_label(nb_prediction, config['label_match']) nn_label = ml.to_label(nn_prediction, config['label_match']) dt_label = ml.to_label(dt_prediction, config['label_match']) # Prepare results of: # (1) Naive Bayes (2) Neural Network (3) Decision Tree result = {'NB': nb_label, 'NN': nn_label, 'DT': dt_label} return result
training """ ml = MachineLearning(**config) # split dataset -> train set, test set training_set, test_set = ml.split_dataset(dataset_sample, 2) # choose your algorithm algo = ml.NiaveBayes() # algo = ml.DecisionTree(criterion='gini', prune='depth', max_depth=30, min_criterion=0.05) # algo = ml.NeuralNetwork(hidden_layer_sizes=(250, 100), learning_rate=0.012, momentum=0.5, random_state=0, max_iter=200, activation='tanh') # train or load model model = algo.train(training_set) # model = algo.load_model() """ end """ """ classify or predict """ # make a prediction predictions = algo.predict(model, test_set) # Prediction accuracy acc = ml.accuracy(predictions, test_set) print('training_set', len(training_set)) print('predictions, prediction_details', predictions, acc) print('label', ml.to_label(predictions, 'data/bag_of_words/label_match.pickle')) """ end """
dataset_sample = FileUtil.load_csv(dataset_path) # dataset_sample = prepro.normalize_dataset(dataset_sample) # print(dataset_sample) # split dataset -> train set, test set training_set, test_set = ml.split_dataset(dataset_sample, 1) # train model = algo.train(training_set) print('==== model ===',model) # make a prediction predictions = algo.predict(model, test_set) # Prediction accuracy acc = ml.accuracy(predictions, test_set) print('predictions, prediction_details', predictions, acc) print('label', ml.to_label(predictions,'data/bag_of_words/label_match.pickle')) print('==== Chatbot train completed! ====') elif args.mode == 'chat': print ("Start chatting with the bot !") model = algo.load_model() print('==== model loaded ===',model) sessionid = 'Liza' while True: question = input('') # preprocess mat = prepro.loading_single_doc(question, 'doc_freq', 1) prediction = algo.predict(model, [mat]) label = ml.to_label(prediction, 'data/bag_of_words/label_match.pickle') answer = get_answer(label) print('prediction', label)