예제 #1
0
class Classifier():
    def __init__(self, *args, **kwargs):
        if kwargs == None:
            config = {
                'text_dir': 'data/dataset/doc',
                'dataset': 'data/matrix',
                'bag_of_words': 'data/bag_of_words',
                'train_model': 'data/model/doc.model',
                'is_unicode': False
            }
        else:
            config = kwargs

        self.ml = MachineLearning(**config)
        # choose your algorithm
        self.algo = self.ml.NiaveBayes()
        # algo = ml.DecisionTree(criterion='gini', prune='depth', max_depth=50, min_criterion=0.05)
        self.prepro = Preprocessing(**config)
        # print ("Start testing with the classifier !")
        self.model = self.algo.load_model()

    def classify(self, question="hello ai"):
        # preprocess
        mat = self.prepro.loading_single_doc(question, 'doc_freq', 1)
        prediction = self.algo.predict(self.model, [mat])
        label = self.ml.to_label(prediction,
                                 'data/bag_of_words/label_match.pickle')
        print(label)
        return label
예제 #2
0
    def classify(config, text):
        """ Text classification
    """

        # Preprocess: transform text to frequency
        prepro = Preprocessing(**config)
        mat = prepro.loading_single_doc(text, 'doc_freq', config['threshold'])
        # Initialize only 3 algorithms at the moment
        ml = MachineLearning(**config)

        # Perform prediction
        # Naive Bayes
        nb_algo = ml.NiaveBayes()
        nb_model = nb_algo.load_model()
        nb_prediction = nb_algo.predict(nb_model, [mat])

        # ANN
        nn_algo = ml.NeuralNetwork(hidden_layer_sizes=(250, 100),\
         learning_rate=0.012, momentum=0.5, random_state=0, max_iter=200, activation='tanh')
        nn_model = nn_algo.load_model()
        nn_prediction = nn_algo.predict(nn_model, [mat])

        # DT
        dt_algo = ml.DecisionTree(criterion='gini',
                                  prune='depth',
                                  max_depth=30,
                                  min_criterion=0.05)
        dt_model = dt_algo.load_model()

        #norm_mat = prepro.normalize_dataset(np.array([mat])) # use with decision tree only
        #norm_mat = prepro.normalize_dataset(np.array([mat])) # use with decision tree only
        #dt_prediction = dt_algo.predict(dt_model, norm_mat)
        dt_prediction = dt_algo.predict(dt_model, np.array([mat]))

        # Get the best labe outputed by BN, NN, DT
        nb_label = ml.to_label(nb_prediction, config['label_match'])
        nn_label = ml.to_label(nn_prediction, config['label_match'])
        dt_label = ml.to_label(dt_prediction, config['label_match'])

        # Prepare results of:
        # (1) Naive Bayes (2) Neural Network (3) Decision Tree
        result = {'NB': nb_label, 'NN': nn_label, 'DT': dt_label}

        return result
예제 #3
0
    training
  """
    ml = MachineLearning(**config)
    # split dataset -> train set, test set
    training_set, test_set = ml.split_dataset(dataset_sample, 2)
    # choose your algorithm
    algo = ml.NiaveBayes()
    # algo = ml.DecisionTree(criterion='gini', prune='depth', max_depth=30, min_criterion=0.05)
    # algo = ml.NeuralNetwork(hidden_layer_sizes=(250, 100), learning_rate=0.012, momentum=0.5, random_state=0, max_iter=200, activation='tanh')
    # train or load model
    model = algo.train(training_set)
    # model = algo.load_model()
    """
    end
  """
    """
    classify or predict
  """
    # make a prediction
    predictions = algo.predict(model, test_set)
    # Prediction accuracy
    acc = ml.accuracy(predictions, test_set)

    print('training_set', len(training_set))
    print('predictions, prediction_details', predictions, acc)
    print('label',
          ml.to_label(predictions, 'data/bag_of_words/label_match.pickle'))
    """
    end
  """
예제 #4
0
    dataset_sample = FileUtil.load_csv(dataset_path)
    # dataset_sample = prepro.normalize_dataset(dataset_sample)
    # print(dataset_sample)

    # split dataset -> train set, test set
    training_set, test_set = ml.split_dataset(dataset_sample, 1)
    # train
    model = algo.train(training_set)
    print('==== model ===',model)
    # make a prediction
    predictions = algo.predict(model, test_set)
    # Prediction accuracy
    acc = ml.accuracy(predictions, test_set)

    print('predictions, prediction_details', predictions, acc)
    print('label', ml.to_label(predictions,'data/bag_of_words/label_match.pickle'))
    print('==== Chatbot train completed! ====')

  elif args.mode == 'chat':
    print ("Start chatting with the bot !")
    model = algo.load_model()
    print('==== model loaded ===',model)
    sessionid = 'Liza'
    while True:
      question 	= input('')
      # preprocess
      mat = prepro.loading_single_doc(question, 'doc_freq', 1)
      prediction = algo.predict(model, [mat])
      label = ml.to_label(prediction, 'data/bag_of_words/label_match.pickle')
      answer = get_answer(label)
      print('prediction', label)