コード例 #1
0
def get_confusion_matrix():
    #    db = DoubleBayes()
    #    predicted, actual = db.generate_predictions(weighted=True)
    nb = Naive_Bayes()
    predicted, actual = nb.generate_predictions()
    cnf_matrix = confusion_matrix(actual, predicted)
    return cnf_matrix
コード例 #2
0
def post():
    if request.is_json:
        data = request.get_json()
    else:
        return jsonify({'request': request.get_data()})
    assert (len(data) == 3)
    try:
        age = int(data['age'])
        condition_list = list(data['conditions'])
        state = str(data['state'])
    except KeyError:
        return jsonify(dict())
    nb = Naive_Bayes(age, state, condition_list)
    return jsonify({'probability': nb.get_probability()})
コード例 #3
0
ファイル: classifier.py プロジェクト: azpoliak/ai_homework4
    def train(self, training_data):
        """
        Data should be nx(m+1) numpy matrix where n is the 
        number of examples and m is the number of features
        (recall that the first element of the vector is the label).

        I recommend implementing the specific algorithms in a
        seperate module and then determining which method to call
        based on classifier_type. E.g. if you had a module called
        neural_nets:

        if self.classifier_type == 'neural_net':
            import neural_nets
            neural_nets.train_neural_net(self.params, training_data)

        Note that your training algorithms should be modifying the parameters
        so make sure that your methods are actually modifying self.params

        You should print the accuracy, precision, and recall on the training data.
        """

        if self.classifier_type == 'neural_network':
            #change num_input, num_output based upon the data
            self.nn = Neural_Network("neural_network",weights = [], num_input=self.params['num_input'], num_hidden=1000, num_output=self.params['num_output'], alt_weight=self.params['one']=='1', momentum=self.params['two']=='1')
            self.nn.train(training_data)
        elif self.classifier_type == 'naive_bayes':
            self.nb = Naive_Bayes("naive_bayes")
            self.nb.train(training_data)
        elif self.classifier_type =='decision_tree':
            self.dt = Decision_Tree("decision_tree", pruning=self.params['one']=='1',
                    info_gain_ratio=self.params['two']=='1')
            self.dt.train(training_data)
コード例 #4
0
from decision_tree import Decision_Tree
from naive_bayes import Naive_Bayes
import load_data as ld
import pdb

# nb = Decision_Tree("decision_tree", pruning=False, info_gain_ratio=True)
nb = Naive_Bayes("naive_bayes")

# monks3 = ld.load_monks(1)
# monks3 = ld.load_monks(2)
# monks3 = ld.load_monks(3)
# monks3 = ld.load_iris(.75)
monks3 = ld.load_congress_data(0.75)

# nb.train(monks3[0])
"""tot, hit = 0, 0
for person in monks3[1]:
  predict = nb.predict(person)
  if predict == person[0]:
  	hit += 1
  tot += 1"""

# classify =  nb.train(monks3[0])

nb.train(monks3[0])
# nb.train(monks3[0])
# nb.train(monks3[0])
# nb.train(monks3[0])
# pdb.set_trace()
# nb.test(monks3[1])
コード例 #5
0
ファイル: main.py プロジェクト: ottofabian/PGM_Project
    hmm.fit(data_train)
    print(f"Duration of training: {time.time() - start_time}")

    # evaluation hmm
    # -------------------------------------------------------------------------
    # plot confusion matrix, calculate precision, recall, f1-score
    hmm.evaluate(data_test)
    # show misclassifications
    features_test, labels_test = separate_labels_from_features(data_test)
    predictions = hmm.predict(features_test)
    show_misclassifications(data_test, predictions)

elif model_type == "NB":
    # fit naive bayes model
    # -------------------------------------------------------------------------
    nb = Naive_Bayes()
    data_train_featurized = feature_maker.get_pos_features_nltk(
        data_train
    ) if not load_entities else feature_maker.get_ner_features_nltk(data_train)

    data_train_featurized = flatten(data_train_featurized)
    start_time = time.time()
    nb.fit_nltk(data_train_featurized)
    print(f"Duration of training: {time.time() - start_time}")

    # evaluation naive bayes
    # -------------------------------------------------------------------------
    data_test_featurized = feature_maker.get_pos_features_nltk(
        data_test
    ) if not load_entities else feature_maker.get_ner_features_nltk(data_test)
コード例 #6
0
ファイル: classifier.py プロジェクト: azpoliak/ai_homework4
class Classifier:

    def __init__(self, classifier_type, **kwargs):
        """
        Initializer. Classifier_type should be a string which refers
        to the specific algorithm the current classifier is using.
        Use keyword arguments to store parameters
        specific to the algorithm being used. E.g. if you were 
        making a neural net with 30 input nodes, hidden layer with
        10 units, and 3 output nodes your initalization might look
        something like this:

        neural_net = Classifier(weights = [], num_input=30, num_hidden=10, num_output=3)

        Here I have the weight matrices being stored in a list called weights (initially empty).
        """
        self.classifier_type = classifier_type
        self.params = kwargs
        """
        The kwargs you inputted just becomes a dictionary, so we can save
        that dictionary to be used in other methods.
        """


    def train(self, training_data):
        """
        Data should be nx(m+1) numpy matrix where n is the 
        number of examples and m is the number of features
        (recall that the first element of the vector is the label).

        I recommend implementing the specific algorithms in a
        seperate module and then determining which method to call
        based on classifier_type. E.g. if you had a module called
        neural_nets:

        if self.classifier_type == 'neural_net':
            import neural_nets
            neural_nets.train_neural_net(self.params, training_data)

        Note that your training algorithms should be modifying the parameters
        so make sure that your methods are actually modifying self.params

        You should print the accuracy, precision, and recall on the training data.
        """

        if self.classifier_type == 'neural_network':
            #change num_input, num_output based upon the data
            self.nn = Neural_Network("neural_network",weights = [], num_input=self.params['num_input'], num_hidden=1000, num_output=self.params['num_output'], alt_weight=self.params['one']=='1', momentum=self.params['two']=='1')
            self.nn.train(training_data)
        elif self.classifier_type == 'naive_bayes':
            self.nb = Naive_Bayes("naive_bayes")
            self.nb.train(training_data)
        elif self.classifier_type =='decision_tree':
            self.dt = Decision_Tree("decision_tree", pruning=self.params['one']=='1',
                    info_gain_ratio=self.params['two']=='1')
            self.dt.train(training_data)

    def predict(self, data):
        """
        Predict class of a single data vector
        Data should be 1x(m+1) numpy matrix where m is the number of features
        (recall that the first element of the vector is the label).

        I recommend implementing the specific algorithms in a
        seperate module and then determining which method to call
        based on classifier_type.

        This method should return the predicted label.
        """

    def test(self, test_data):
        """
        Data should be nx(m+1) numpy matrix where n is the 
        number of examples and m is the number of features
        (recall that the first element of the vector is the label).

        You should print the accuracy, precision, and recall on the test data.
        """
        
        #pdb.set_trace()
        #Accuracy, Recall, and Precision
        relevant_and_retrieved, relevant, retrieved, total, hit = 0, 0, 0, 0, 0
        for person in test_data:
            predict = 0
            if self.classifier_type == 'neural_network':
                predict = self.nn.predict(person)
            elif self.classifier_type == 'naive_bayes':
                predict = self.nb.predict(person)
            elif self.classifier_type == 'decision_tree':
                predict = self.dt.predict(person)
            if predict == person[0]:
                if predict == 0:
                    relevant_and_retrieved += 1
                hit += 1
            if person[0] == 0:
                relevant += 1
            if predict == 0:
                retrieved += 1
            total += 1
        accuracy = hit/float(total)
        recall = relevant_and_retrieved/float(relevant)
        precision = relevant_and_retrieved/float(retrieved)
        print "Accuracy: ", accuracy
        print "Precision ", precision
        print "Recall: " , recall
コード例 #7
0
def train(instances, algorithm, high_idx, learn_rate, iterate, peg_lambda,
          k_val, T, clus_lambda, K, clus_iter):

    if (algorithm == "perceptron"):
        classifier = Perceptron(instances, high_idx, learn_rate)
        #iterate the training
        for i in range(iterate):
            classifier.train(instances)
        return classifier

    elif (algorithm == "averaged_perceptron"):
        classifier2 = AveragePerceptron(instances, high_idx, learn_rate)
        for i in range(iterate):
            classifier2.train(instances)
        return classifier2

    elif (algorithm == "pegasos"):
        classifier3 = Pegasos(instances, high_idx, peg_lambda)
        for i in range(iterate):
            classifier3.train(instances)
        return classifier3

    elif (algorithm == "margin_perceptron"):
        classifier4 = PerceptronMargin(instances, high_idx, learn_rate,
                                       iterate)
        for i in range(iterate):
            classifier4.train(instances)
        return classifier4

    elif (algorithm == "knn"):
        classifier5 = KNN(instances, k_val, high_idx)
        for i in range(iterate):
            classifier5.train(instances)
        return classifier5

    elif (algorithm == "distance_knn"):
        classifier6 = Distance_KNN(instances, k_val, high_idx)
        for i in range(iterate):
            classifier6.train(instances)
        return classifier6

    elif (algorithm == "adaboost"):
        classifier7 = Adaboost(instances, T, high_idx)
        for i in range(iterate):
            classifier7.train(instances)
        return classifier7
    elif (algorithm == "lambda_means"):
        classifier8 = Lambda_Means2(instances, high_idx, clus_lambda,
                                    clus_iter)
        for i in range(iterate):
            #print "##################Training", i+1, "out of", iterate,"###############"
            classifier8.train(instances)
        return classifier8
    elif (algorithm == "nb_clustering"):
        classifier9 = Naive_Bayes(instances, high_idx, K)
        for i in range(iterate):
            #print "##################Training", i+1, "out of", iterate,"###############"
            classifier9.train(instances)
        return classifier9
    elif (algorithm == "mc_perceptron"):
        classifier10 = MC_Perceptron(instances, high_idx)
        for i in range(iterate):
            classifier10.train(instances)
        return classifier10

    else:
        return None
コード例 #8
0
from naive_bayes import Naive_Bayes
import load_data as ld
import pdb

nb = Naive_Bayes("naive_bayes")

congress = ld.load_congress_data(0.75)

# iris = ld.load_iris(.75)

classify = nb.train(congress[0])

# nb.train(iris[0])
# pdb.set_trace()
# nb.test(congress[1])

tot, hit = 0, 0
for person in congress[1]:
    predict = nb.predict(person)
    if predict == person[0]:
        hit += 1
    tot += 1

print hit, tot, hit / float(tot)