Esempi in Python per MachineLearning, esempi in Python per khmerml.machine_learning.MachineLearning

Esempio n. 1

0

Mostra file

class Classifier():
    def __init__(self, *args, **kwargs):
        if kwargs == None:
            config = {
                'text_dir': 'data/dataset/doc',
                'dataset': 'data/matrix',
                'bag_of_words': 'data/bag_of_words',
                'train_model': 'data/model/doc.model',
                'is_unicode': False
            }
        else:
            config = kwargs

        self.ml = MachineLearning(**config)
        # choose your algorithm
        self.algo = self.ml.NiaveBayes()
        # algo = ml.DecisionTree(criterion='gini', prune='depth', max_depth=50, min_criterion=0.05)
        self.prepro = Preprocessing(**config)
        # print ("Start testing with the classifier !")
        self.model = self.algo.load_model()

    def classify(self, question="hello ai"):
        # preprocess
        mat = self.prepro.loading_single_doc(question, 'doc_freq', 1)
        prediction = self.algo.predict(self.model, [mat])
        label = self.ml.to_label(prediction,
                                 'data/bag_of_words/label_match.pickle')
        print(label)
        return label

Esempio n. 2

0

Mostra file

    def __init__(self, *args, **kwargs):
        if kwargs == None:
            config = {
                'text_dir': 'data/dataset/doc',
                'dataset': 'data/matrix',
                'bag_of_words': 'data/bag_of_words',
                'train_model': 'data/model/doc.model',
                'is_unicode': False
            }
        else:
            config = kwargs

        self.ml = MachineLearning(**config)
        # choose your algorithm
        self.algo = self.ml.NiaveBayes()
        # algo = ml.DecisionTree(criterion='gini', prune='depth', max_depth=50, min_criterion=0.05)
        self.prepro = Preprocessing(**config)
        # print ("Start testing with the classifier !")
        self.model = self.algo.load_model()

Esempio n. 3

0

Mostra file

    def classify(config, text):
        """ Text classification
    """

        # Preprocess: transform text to frequency
        prepro = Preprocessing(**config)
        mat = prepro.loading_single_doc(text, 'doc_freq', config['threshold'])
        # Initialize only 3 algorithms at the moment
        ml = MachineLearning(**config)

        # Perform prediction
        # Naive Bayes
        nb_algo = ml.NiaveBayes()
        nb_model = nb_algo.load_model()
        nb_prediction = nb_algo.predict(nb_model, [mat])

        # ANN
        nn_algo = ml.NeuralNetwork(hidden_layer_sizes=(250, 100),\
         learning_rate=0.012, momentum=0.5, random_state=0, max_iter=200, activation='tanh')
        nn_model = nn_algo.load_model()
        nn_prediction = nn_algo.predict(nn_model, [mat])

        # DT
        dt_algo = ml.DecisionTree(criterion='gini',
                                  prune='depth',
                                  max_depth=30,
                                  min_criterion=0.05)
        dt_model = dt_algo.load_model()

        #norm_mat = prepro.normalize_dataset(np.array([mat])) # use with decision tree only
        #norm_mat = prepro.normalize_dataset(np.array([mat])) # use with decision tree only
        #dt_prediction = dt_algo.predict(dt_model, norm_mat)
        dt_prediction = dt_algo.predict(dt_model, np.array([mat]))

        # Get the best labe outputed by BN, NN, DT
        nb_label = ml.to_label(nb_prediction, config['label_match'])
        nn_label = ml.to_label(nn_prediction, config['label_match'])
        dt_label = ml.to_label(dt_prediction, config['label_match'])

        # Prepare results of:
        # (1) Naive Bayes (2) Neural Network (3) Decision Tree
        result = {'NB': nb_label, 'NN': nn_label, 'DT': dt_label}

        return result

Esempio n. 4

0

Mostra file

File: main.py Progetto: slashdigital/khmer-ml

        'is_unicode': False
    }

    prepro = Preprocessing(**config)
    # preposessing
    dataset_matrix = prepro.loading_data(config['text_dir'], 'doc_freq', 'all',
                                         1)

    #load dataset from file (feature data)
    filename = "doc_freq_1.csv"
    dataset_path = FileUtil.dataset_path(config, filename)
    dataset_sample = FileUtil.load_csv(dataset_path)

    prepro_time = time.time() - whole_st

    ml = MachineLearning(**config)
    # choose your algorithm
    nb_algo = ml.NiaveBayes()
    nn_algo = ml.NeuralNetwork(hidden_layer_sizes=(250, 100),
                               learning_rate=0.012,
                               momentum=0.5,
                               random_state=0,
                               max_iter=200,
                               activation='tanh')
    dt_algo = ml.DecisionTree(criterion='gini',
                              prune='depth',
                              max_depth=30,
                              min_criterion=0.05)

    nb_result = perform_algo(ml, nb_algo, dataset_sample)
    nn_result = perform_algo(ml, nn_algo, dataset_sample)

Esempio n. 5

0

Mostra file

File: main.py Progetto: sophatvathana/khmer-ml

   preposessing
 """
   prepro = Preprocessing(**config)
   # dataset_matrix = prepro.loading_data(config['text_dir'], 'doc_freq', 'all', 25)
   #load dataset from file (feature data)
   filename = "doc_freq_25.csv"
   dataset_path = FileUtil.dataset_path(config, filename)
   dataset_sample = FileUtil.load_csv(dataset_path)
   # dataset_sample = prepro.normalize_dataset(dataset_sample) # use with decision tree only
   """
   end
 """
   """
   training
 """
   ml = MachineLearning(**config)
   # split dataset -> train set, test set
   training_set, test_set = ml.split_dataset(dataset_sample, 2)
   # choose your algorithm
   algo = ml.NiaveBayes()
   # algo = ml.DecisionTree(criterion='gini', prune='depth', max_depth=30, min_criterion=0.05)
   # algo = ml.NeuralNetwork(hidden_layer_sizes=(250, 100), learning_rate=0.012, momentum=0.5, random_state=0, max_iter=200, activation='tanh')
   # train or load model
   model = algo.train(training_set)
   # model = algo.load_model()
   """
   end
 """
   """
   classify or predict
 """

Esempio n. 6

0

Mostra file

    def get_results(path_textfile, params, config, start_time):
        """
      This function performs features extraction from client's data source\
      Train model based on extracted features
      Get Accuracy of each algorithm (e.g: Naive Bayes, Neural Network) based on\
      evaluation criteria e.g: LOO, 5 folds or 10 folds
    """

        # Store config for next use
        config = config
        is_unicode = config.get('is_unicode', None)
        config['is_unicode'] = True if is_unicode != None else False
        #logfile = '/Users/lion/Documents/py-workspare/slash-ml/logfile.log'
        #logging.basicConfig(filename=logfile, level=logging.DEBUG)
        config['passion'] = "passion"
        # Perform features extraction
        is_successful_fextract = MLManager.extract_features(
            path_textfile, config)
        #is_successful_fextract = True

        if is_successful_fextract:
            whole_st = time.time()

            prepro = Preprocessing(**config)

            # preposessing
            params_prepro = params['PR']

            dataset_matrix = prepro.loading_data(config['text_dir'], params_prepro['method'],\
             'all', params_prepro['threshold'])

            # Remove sub-directory from "data/dataset/text"
            FileUtil.remove_file(config['text_dir'], ignore_errors=True)

            #load dataset from file (feature data)
            filename = "doc_freq_" + str(params_prepro['threshold']) + ".csv"
            dataset_path = FileUtil.dataset_path(config, filename)
            dataset_sample = FileUtil.load_csv(dataset_path)

            prepro_time = time.time() - whole_st

            ml = MachineLearning(**config)

            # choose your algorithm
            nb_algo = ml.NiaveBayes()

            params_nn = params['NN']
            nn_algo = ml.NeuralNetwork(hidden_layer_sizes=params_nn['hidden_layer_sizes'],\
             learning_rate=params_nn['learning_rate'], momentum=params_nn['momentum'],\
              random_state=params_nn['random_state'], max_iter=params_nn['max_iter'],\
               activation=params_nn['activation'])

            params_dt = params['DT']
            dt_algo = ml.DecisionTree(criterion=params_dt['criterion'], prune='depth',\
             max_depth=params_dt['max_depth'], min_criterion=params_dt['min_criterion'])

            nb_result = MLManager.perform_algo(ml, nb_algo, dataset_sample)
            nn_result = MLManager.perform_algo(ml, nn_algo, dataset_sample)
            dt_result = MLManager.perform_algo(ml, dt_algo, dataset_sample)

            print(nb_result, nn_result, dt_result)

            total_execution_time = time.time() - whole_st

            result = {
                'com_time': round(total_execution_time, 2),
                'text_extract_time': round(prepro_time, 2),
                'figure_on_testing_data': {
                    'NB': nb_result['acc'],
                    'NN': nn_result['acc'],
                    'DT': dt_result['acc'],
                },
                'figure_on_training_data': {
                    'NB': nb_result['acc_train'],
                    'NN': nn_result['acc_train'],
                    'DT': dt_result['acc_train'],
                },
                'on_testing_data': {
                    'NB': {
                        'accuracy': nb_result['acc'],
                        'time': nb_result['exec_time']
                    },
                    'NN': {
                        'accuracy': nn_result['acc'],
                        'time': nn_result['exec_time']
                    },
                    'DT': {
                        'accuracy': dt_result['acc'],
                        'time': dt_result['exec_time']
                    },
                },
                'on_training_data': {
                    'NB': {
                        'accuracy': nb_result['acc_train'],
                        'time': nb_result['exec_time']
                    },
                    'NN': {
                        'accuracy': nn_result['acc_train'],
                        'time': nn_result['exec_time']
                    },
                    'DT': {
                        'accuracy': dt_result['acc_train'],
                        'time': dt_result['exec_time']
                    },
                }
            }

        return result

Esempio n. 7

0

Mostra file

if __name__ == "__main__":
  parser = argparse.ArgumentParser()
  parser.add_argument('--mode',type=str,default ='chat',help='There are two mode (chat, train, train_c, test and none), The defaul value is chat.')
  # parser.add_argument("--benchmark", help="run benchmark",action="store_true")
  # parser.add_argument('--mode',type=float,default =0.2,help='There two mode?(train and chat)')
  args = parser.parse_args()

  config = {
    'text_dir': 'data/dataset/chatbot',
    'dataset': 'data/matrix',
    'bag_of_words': 'data/bag_of_words',
    'train_model': 'data/model/train.model',
    'is_unicode': False
  }

  ml = MachineLearning(**config)
  # choose your algorithm
  # algo = ml.NiaveBayes()
  algo = ml.DecisionTree(criterion='gini', prune='depth', max_depth=50, min_criterion=0.05)
  prepro = Preprocessing(**config)
  # -- mode
  if args.mode == 'train' :
    # preposessing
    dataset_matrix = prepro.loading_data(config['text_dir'], 'doc_freq', 'all', 1)
    #load dataset from file (feature data)
    filename = "doc_freq_1.csv"
    dataset_path = FileUtil.dataset_path(config, filename)
    dataset_sample = FileUtil.load_csv(dataset_path)
    # dataset_sample = prepro.normalize_dataset(dataset_sample)
    # print(dataset_sample)