Exemple #1
0
 def train(self):
     if self.uni_gram == True:
         self.nb_uni = NaiveBayes.NaiveBayes(self.train_tweets)
         self.nb_uni.train()
     if self.bi_gram == True:
         self.nb_bi = NaiveBayes.NaiveBayes(self.train_tweets, bi_gram=True)
         self.nb_bi.train()
def main():
    parser = argparse.ArgumentParser(description="Parse Values.")
    parser.add_argument('-arg1', 'trainPath', type=str, required=True)
    parser.add_argument('-arg2', 'testPath', type=str, required=True)
    parser.add_argument('-arg3', 'n', type=int, required=True)
    parser.add_argument('-arg4', 'lamda', type=float, required=True)
    args = parser.parse_args()

    trainPath = args.trainPath
    testPath = args.testPath
    n = args.n
    lamda = args.lamda

    nbModel = NaiveBayes()

    inout = io.IO()

    trainSet = inout.readDocuments(trainPath, n)
    testSet = inout.readDocuments(testPath, n)

    nbModel.train(trainSet)

    for doc in testSet:
        bestLanguage = nbModel.mostLikelyLanguage(doc.text, lamda)
        print(id + "|" + bestLanguage)
Exemple #3
0
def main():
    """
    main method
    :return:
    """
    # get data
    train_df, test_df = generate_df("data/review_polarity/txt_sentoken")

    # separate training data into data, train_labels
    train_labels = pd.DataFrame(train_df["category"])
    train_df = train_df["text"]

    # create model
    nb = NaiveBayes.NaiveBayes()

    # train
    nb.fit(train_df, train_labels)

    # predict
    output = nb.predict(test_df)

    # check accuracy
    df = pd.DataFrame()
    df['guess'] = output['guess']
    df['actual'] = test_df['category']

    df['correct'] = df['guess'] == df['actual']

    print df
    print np.mean(df['correct'])
Exemple #4
0
def output_test_file(input_filename, output_filename):
    #class, gender, and ticket fare
    #   KNN_classifier = KNN(5, [test_columns.Pclass,test_columns.Sex,test_columns.Fare])
    train_data = load_data('train.csv', 'train')

    bin_data(train_data)
    #   attributes = [ x for x,y in enumerate(att_values) if (y != 'skip' and x != 0)]
    #   DecisionTreeClassifier = DecisionTree(train_data, attributes,'')
    NBClassifier = \
    NaiveBayes([test_columns.PassengerId,test_columns.Sex,test_columns.Fare,test_columns.Pclass,test_columns.Age])

    test_data = load_data(input_filename, 'test')
    output_file_object = csv.writer(open("%s" % output_filename, 'wb'))
    output_file_object.writerow(["Survived", "PassengerID"])

    #   for row in test_data:
    #      if row[test_columns.Sex] == 'female':
    #         row[test_columns.Sex] = 0.0
    #      else:
    #         row[test_columns.Sex] = 1.0

    bin_data(test_data)
    for row in test_data:
        if NBClassifier.predict(row) == 1:
            output_file_object.writerow(["1", row[0]])
        else:
            output_file_object.writerow(["0", row[0]])
Exemple #5
0
 def __init__(self, filename,classifier='NaiveBayes'):
     self.classifier = NB.NaiveBayes()
     self.filename = filename
     data = pd.read_csv(filename, header=None, \
                             delimiter="\t", quoting=3)
     self.corpus = data[1]
     self.labels = data[0]
     self.build_vocab(self.corpus)
Exemple #6
0
def tarea1(entrenamiento, prueba):
    d = Main()
    (t_0, t_1) = d.split(entrenamiento)
    nb = NaiveBayes.NaiveBayes(entrenamiento, t_1, t_0, prueba)
    nb.plot()
    b = Bayes.Bayes(entrenamiento, t_1, t_0, prueba)
    b.plot()
    return
Exemple #7
0
def testNaiveBayes():
    X = np.mat(np.loadtxt(r"data\iris\iris.txt", delimiter=","))
    numbers = np.mat([0] * 4)

    nb = NaiveBayes(1)
    nb.train(X, numbers)
    result = nb.predict(X)

    print(X[(X[:, -1] != result).A.flatten(), :].shape[0] / X.shape[0])
Exemple #8
0
    def test(self):
        """Test na sztucznych danych."""

        def getfeatures(text):
            """Funkcja do testów."""
            return list(set(text.split()))

        bayes = NaiveBayes.NaiveBayes(getfeatures)

        bayes.feature_count = {('terms,', 'C1'): 1, ('considers', 'C2'): 1,
                    ('independently', 'C3'): 1, ('each', 'C1'): 1,
                    ('that', 'C1'): 1, ('the', 'C3'): 1, ('on', 'C1'): 1,
                    ('features', 'C1'): 1, ('and', 'C3'): 1, ('is', 'C2'): 1,
                    ('feature.', 'C2'): 1, ('For', 'C2'): 1, ('fruit', 'C2'): 1,
                    ('features,', 'C2'): 1, ('classifier', 'C2'): 1,
                    ('(or', 'C2'): 2, ('these', 'C1'): 1, ('the', 'C2'): 2,
                    ('particular', 'C2'): 1, ('may', 'C2'): 1,
                    ('Bayes', 'C2'): 1, ('all', 'C2'): 1, ('feature', 'C2'): 1,
                    ('apple', 'C3'): 1, ('naive', 'C2'): 1, ('depend', 'C1'): 1,
                    ('other', 'C2'): 2, ('if', 'C3'): 1,
                    ('contribute', 'C3'): 1, ('any', 'C2'): 1,
                    ('these', 'C2'): 1, ('4"', 'C3'): 1,
                    ('classifier', 'C1'): 1, ('other', 'C1'): 1,
                    ('of', 'C1'): 1, ('assumes', 'C1'): 1,
                    ('Bayes', 'C1'): 1, ('Even', 'C1'): 1,
                    ('presence', 'C1'): 1, ('the', 'C1'): 2,
                    ('a', 'C2'): 3, ('upon', 'C1'): 1,
                    ('that', 'C3'): 1, ('example,', 'C2'): 1,
                    ('properties', 'C3'): 1, ('this', 'C3'): 1,
                    ('to', 'C2'): 1, ('In', 'C1'): 1,
                    ('round,', 'C3'): 1, ('about', 'C3'): 1,
                    ('absence)', 'C2'): 2, ('of', 'C2'): 3,
                    ('diameter.', 'C3'): 1,
                    ('existence', 'C1'): 1, ('be', 'C3'): 1,
                    ('considered', 'C3'): 1, ('a', 'C1'): 1,
                    ('it', 'C3'): 1, ('an', 'C3'): 1,
                    ('or', 'C1'): 1, ('if', 'C1'): 1,
                    ('presence', 'C2'): 1, ('is', 'C3'): 1,
                    ('to', 'C3'): 2, ('unrelated', 'C2'): 1,
                    ('red,', 'C3'): 1, ('probability', 'C3'): 1,
                    ('naive', 'C1'): 1, ('class', 'C2'): 1,
                    ('in', 'C3'): 1, ('simple', 'C1'): 1}

        bayes.class_count = {'C1': 2, 'C2': 3, 'C3': 2}

        feat_cats = [
            ('of', 'C2'), ('to', 'C3'), ('features', 'C1'),
            ('Bayes', 'C1'), ('of', 'C1'),
            ('to', 'C5'), ('features', 'C3'), ('Bayes', 'C2')]
        probs = [0.0, 0.0, -0.6931,
                 -0.6931, -0.6931,
                 -1e+300, -7.6009, -1.0986]

        for idx in range(len(feat_cats)):
            self.assertAlmostEqual(
                featprob(bayes, feat_cats[idx][0], feat_cats[idx][1]),
                probs[idx], 4)
Exemple #9
0
def mainWIthAllFlower():
    irisData = datasets.load_iris()
    newDataset = concateTargetWithDataset(irisData.data, irisData.target)

    naiveBayes = NaiveBayes.NaiveBayes()
    crossValidator = CrossValidator.CrossValidator(algo=naiveBayes,
                                                   dataset=newDataset,
                                                   nbFolds=10)
    _scoresByFold, meanAccuracy, _rocData = crossValidator.score()
    print('Accuracy: %.2f%%' % meanAccuracy)
Exemple #10
0
def create_classifier():
    dir_pos = os.path.join(BASE_DIR, "pos")
    dir_neg = os.path.join(BASE_DIR, "neg")

    nbc = nb.NaiveBayes(positive_corpus=dir_pos, negative_corpus=dir_neg)

    # treina as duas categorias
    nbc.train()

    return nbc
def test_model_nb(dataset):
    X_con, X_cat, Y, test_con, test_cat, test_y = testImport.read_data(
        dataset, 0)

    model = NaiveBayes.NaiveBayes()
    model.fit(X_con, X_cat, Y)
    y_hat = model.predict(test_con, test_cat)

    ac = evaluate_acc_NB(test_y, y_hat)
    print(ac)
 def _init_classifiers(self):
     # Initialize classifier objects
     self.fenc = FreemanEncoder()
     self.knn = KNN.KNN()
     self.HMM = HMM.HMM()
     self.NaiveBayes = NaiveBayes.NaiveBayes()
     self.RandomForest = RandomForest.RandomForests()
     self.SVM = svm.SVM_SVC()
     self.LogisticReg = LogisticReg.LogisticReg()
     self.AdaBoost = adaboost.AdaBoost()
     self.GBRT = gbrt.GBRT()
     
     #Train initially on the default data set, if no model saved already
     
     # Initialize KNN, no saved model for KNN
     self.knn.knn_train(CharRecognitionGUI_support.training_dataset, 1.0)
     
     # Initialize HMM
     self.HMM.training(CharRecognitionGUI_support.training_dataset)
     
     # Initialize Naive Bayes
     try:
         pickle.load( open( "./Models/naivebayes_model.p", "rb" ) )
     except IOError:
         self.NaiveBayes.training(CharRecognitionGUI_support.training_dataset)
     
     # Initialize Random Forest
     try:
         pickle.load( open( "./Models/random_forest.p", "rb" ) )
     except IOError:
         self.RandomForest.training(CharRecognitionGUI_support.training_dataset)
     
     # Initialize SVM
     try:
         pickle.load( open( "./Models/svm.p", "rb" ) )
     except IOError:
         self.SVM.training(CharRecognitionGUI_support.training_dataset)
     
     # Initialize Logistic Regression
     try:
         pickle.load( open( "./Models/logistic_model.p", "rb" ) )
     except IOError:
         self.LogisticReg.training(CharRecognitionGUI_support.training_dataset)
         
     # Initialize AdaBoost
     try:
         pickle.load( open( "./Models/AdaBoostClassifier.p", "rb" ) )
     except IOError:
         self.AdaBoost.training(CharRecognitionGUI_support.training_dataset)
         
     # Initialize GBRT
     try:
         pickle.load( open( "./Models/GradientBoostingClassifier.p", "rb" ) )
     except IOError:
         self.GBRT.training(CharRecognitionGUI_support.training_dataset)
Exemple #13
0
def funcPCA():
    nb = NaiveBayes.NaiveBayes()
    data = nb.convert(0)
    pca = PCA(n_components=512)
    print("\nNaive Byes after PCA to reduced data dimension to 512\n")
    featureMatrix = np.zeros([len(data.train),1024])
    for i, image in enumerate(data.train):
        featureMatrix[i]=image.inp_data
    featureMatrix = pca.fit(featureMatrix).transform(featureMatrix)
    for i, image in enumerate(data.train):
        data.train[i].inp_data = featureMatrix[i]
    featureMatrix = np.zeros([len(data.test),1024])
    for i, image in enumerate(data.test):
        featureMatrix[i]=image.inp_data

    featureMatrix = pca.fit(featureMatrix).transform(featureMatrix)

    for i, image in enumerate(data.test):
        data.test[i].inp_data = featureMatrix[i]
    print("\nCalculating the Likelihood and Prior\n")
    likelihood,prior = nb.train(data)
    train_accuracy = nb.classify(data, likelihood, prior)
    train_accuracy = float("{:.2f}".format(train_accuracy))
    print("\nThe training error rate is ::", \
          train_accuracy,"%\n")

    test_accuracy = nb.test(data, likelihood, prior)
    test_accuracy = float("{:.2f}".format(test_accuracy))
    print("\nThe testing error rate is ::", \
          test_accuracy,"%\n")


    print("\nKNN after PCA to reduced data dimension to 512\n")




    print("Please NOTE it will takes 15 minutes for KNN to run\n")
    knn = KNearestNeighbours.KNearestNeighbours()
    data = knn.convert(0)
    print("\nEvaluating the testing error in KNN using different k values\n")
    testErrors = []
    trainErrors = []
    for k in range(1,11):
        testErrors.append(knn.classify(data, k))
        print("\nThe testing error rate for k = ",k,"is :",testErrors[-1],"\n")

    print("\nEvaluating the training error in KNN using different k values\n")

    for k in range(1,11):
        trainErrors.append(knn.train(data, k))
        print("\nThe training error rate for k = ",k,"is :",trainErrors[-1],"\n")
Exemple #14
0
def mainTestBrainCancer():
    irisData = datasets.load_breast_cancer()
    newDataset = concateTargetWithDataset(irisData.data, irisData.target)

    naiveBayes = NaiveBayes.NaiveBayes()
    crossValidator = CrossValidator.CrossValidator(algo=naiveBayes,
                                                   dataset=newDataset,
                                                   nbFolds=10)
    _scoresByFold, meanAccuracy, rocData = crossValidator.score()
    print('Accuracy: %.2f%%' % meanAccuracy)

    roc = ROC.ROC()
    roc.rocCurve(rocData)
    roc.showROC()
Exemple #15
0
def main(data, method, P, **kwargs):
    partitions = list(split(data, P)) if P > 1 else [data]

    metrics = list()
    for i in range(P):
        if method == "DT":
            model = DecisionTree(kwargs['RENDER_TREE'])
        elif method == "RF":
            model = RandomForest(kwargs['T'], kwargs['M'], kwargs['bagging'])
        elif method == "KNN":
            model = KNN(kwargs['K'], kwargs['scaling'])
        elif method == "NB":
            model = NaiveBayes()
        elif method == "BST":
            model = Boost(kwargs['T'])
        test = partitions[i]
        train = []

        for j in range(0, i):
            train += partitions[j]

        for j in range(i + 1, P):
            train += partitions[j]

        result = model.fit_transform(train, test)

        actual = map(lambda t: t['class'], result)
        predicted = map(lambda t: t['assigned'], result)

        metrics.append(Performance(actual, predicted))
        print metrics[-1]
        print

    if not P == 1:
        print '-----------------------------'
        print "Accuracy:", round(
            mean(filter(lambda v: v <= 1, map(lambda m: m.accuracy(),
                                              metrics))) * 100, 2), "%"
        print "Precision:", round(
            mean(
                filter(lambda v: v <= 1, map(lambda m: m.precision(),
                                             metrics))) * 100, 2), "%"
        print "Recall:", round(
            mean(filter(lambda v: v <= 1, map(lambda m: m.recall(), metrics)))
            * 100, 2), "%"
        print "F1-Measure:", round(
            mean(filter(lambda v: v <= 1, map(lambda m: m.f1(), metrics))) *
            100, 2), "%"
Exemple #16
0
def mainWhitoutLastFlower():
    irisData = datasets.load_iris()
    irisData.data = irisData.data[50:]
    irisData.target = irisData.target[50:]
    newDataset = concateTargetWithDataset(irisData.data, irisData.target)

    naiveBayes = NaiveBayes.NaiveBayes()
    crossValidator = CrossValidator.CrossValidator(algo=naiveBayes,
                                                   dataset=newDataset,
                                                   nbFolds=10)
    _scoresByFold, meanAccuracy, rocData = crossValidator.score()
    print('Accuracy: %.2f%%' % meanAccuracy)

    roc = ROC.ROC()
    roc.rocCurve(rocData)
    roc.showROC()
Exemple #17
0
def main():
    """
    Loads data into partitions, creates a Naive Bayes model based on the train
    data, runs the model on the test data, and evaluates its accuracy.
    """
    opts = util.parse_args()
    train_partition, test_partition = util.read_arff(opts.filename)

    nb_model = NaiveBayes(train_partition)

    examples = test_partition.data
    total = len(examples)
    total_correct = 0

    K = test_partition.K
    confusion_matrix = np.zeros((K, K), int)
    for example in examples:
        y_hat = nb_model.classify(example.features)
        y = example.label
        confusion_matrix[y][y_hat] += 1

        if y_hat == y:
            total_correct += 1

    accuracy = round(total_correct / total, 6)
    accuracy_str = "Accuracy: " + str(accuracy) + " ("
    correct_str = str(total_correct) + " out of " + str(total) + " correct)"
    print(accuracy_str + correct_str)
    stretch = 8
    prediction_labels = "   "
    top_row = "   "
    table = ""
    for y_hat in range(K):
        prediction_labels += " " * (stretch -
                                    len(str(y_hat + 1))) + str(y_hat + 1)
        top_row += "-" * stretch
    for y in range(K):
        table += " " + str(y + 1) + "|"
        for y_hat in range(K):
            entry = str(confusion_matrix[y][y_hat])
            table += " " * (stretch - len(entry)) + entry
        table += "\n"
    print("\n\n        prediction")
    print(prediction_labels)
    print(top_row)
    print(table)
Exemple #18
0
 def learnClassifer(self):
     model = NaiveBayes()
     dict = {}
     dict['cases'] = 1
     attributes = []
     for j in range(len(self.featureFactory.datatable)):
         dict = {}
         dict['cases'] = 1
         dict['attributes'] = {}
         line = self.featureFactory.datatable[j]
         for i in range(len(line)):
             dict['attributes'][str(i)] = line[i]
             attributes.append(str(i))
         dict['label'] = self.featureFactory.classes[j]
         model.add_instances(dict)
     model.set_real(attributes)
     model.train()
     self.model = model
     return pickle.dumps(model).encode('string_escape')
def test_both():
    log_res = []
    nb_res = []
    for i in range(1, 5):
        x, y, x_test, y_test = testImport.read_data(i, 1)
        x_con, x_cat, y_, xt_con, xt_cat, yt = testImport.read_data(i, 0)

        log = LogRegression.Log_Regression(1, 0.005, 25000)
        nb = NaiveBayes.NaiveBayes()

        log.fit(x, y)
        nb.fit(x_con, x_cat, y)

        log_per = evaluate_acc(y_test, log.predict(x_test))
        nb_per = evaluate_acc_NB(yt, nb.predict(xt_con, xt_cat))
        log_res.append(log_per)
        nb_res.append(nb_per)
    print(log_res)
    print(nb_res)
def test_nb_smaller_d():
    d_list = [200, 100, 50, 40, 30, 25, 20, 10, 7, 5, 4, 3]
    for i in range(1, 5):
        smaller_d = []
        x_con, x_cat, y, xt_con, xt_cat, yt = testImport.read_data(i, 0)
        for d in d_list:
            if x_con is not None:
                x_con, xt_con = less_features(x_con, xt_con, d)
            if x_cat is not None:
                x_cat, xt_cat = less_features(x_cat, xt_cat, d)
            model = NaiveBayes.NaiveBayes()
            model.fit(x_con, x_cat, y)
            smaller_d.append(evaluate_acc_NB(yt, model.predict(xt_con,
                                                               xt_cat)))
        plt.plot(d_list, smaller_d)
        plt.xlabel('N')
        plt.ylabel('performance')
    plt.legend(['ionosphere', 'census', 'poker', 'credit'])
    plt.savefig('nb_testing/smaller_d')
Exemple #21
0
def mainWhitoutMiddleFlower():
    irisData = datasets.load_iris()
    irisData.data = [
        instance for index, instance in enumerate(irisData.data)
        if index < 51 or index > 100
    ]
    irisData.target = list(filter(lambda label: label != 1, irisData.target))
    newDataset = concateTargetWithDataset(irisData.data, irisData.target)

    naiveBayes = NaiveBayes.NaiveBayes()
    crossValidator = CrossValidator.CrossValidator(algo=naiveBayes,
                                                   dataset=newDataset,
                                                   nbFolds=10)
    _scoresByFold, meanAccuracy, rocData = crossValidator.score()
    print('Accuracy: %.2f%%' % meanAccuracy)

    roc = ROC.ROC()
    roc.rocCurve(rocData)
    roc.showROC()
Exemple #22
0
def main():

    opts = util.parse_args()
    train_partition = util.read_arff(opts.train_filename)
    test_partition = util.read_arff(opts.test_filename)

    #Creating Naive Bayes Model
    nb_model = NaiveBayes(train_partition)
    m = len(test_partition.labels)
    confusion_matrix = np.zeros((m, m))  #initializing the confusion matrix
    accuracy = 0
    for x in test_partition.data:
        y_hat = nb_model.classify(x.features)
        y = x.label
        confusion_matrix[y][y_hat] += 1
        if y == y_hat:
            accuracy += 1

    print('Accuracy: ' + str(round(accuracy / test_partition.n, 6)) + ' (' +
          str(accuracy) + ' out of ' + str(test_partition.n) + ' correct)')
    print(confusion_matrix)
Exemple #23
0
def main(argv):
   testname = ''

   HlayerSize = 100
   HlayerCount = 2

   nsplits = 3
   
   try:
      opts, args = getopt.getopt(argv,"hi:o:",["tname=","stname="])
   except getopt.GetoptError:
      print 'test.py -t <testname> -hls <Hidden Layer Size> -hlc <Hidden Layer Count> -n <nsplits>'
      sys.exit(2)
   for opt, arg in opts:
      if opt == '-h':
          print 'test.py -t <testname> -hls <Hidden Layer Size> -hlc <Hidden Layer Count> -n <nsplits>'
          sys.exit()
      elif opt in ("-t", "--tname"):
          testname = arg
      elif opt in ("-hls"):
          HlayerSize = arg
      elif opt in ("-hlc"):
          HlayerCount = arg
      elif opt in ("-n"):
          nsplits = arg

    Hlayer = [HlayerSize] * HlayerCount
          
    NB = nb.NaiveBayes(testname = testname,subtestname='naivebayes')
    X,Y= NB.loadMatrixFromFile()
    res.getResults(NB)

    RF = rf.RandomForest(testname=testname,subtestname='randomforest')
    X,Y = RF.loadMatrixFromFile()
    res.getResults(RF)

    NN = nn.NeuralNetwork(testname=testname,subtestname='neuralnetwork',HlayerSizes=Hlayer, nsplits=nsplits)
    X,Y = NN.loadMatrixFromFile()
    res.getResults(NN)
Exemple #24
0
def main():
    # Process the data
    opts = util.parse_args()
    train_partition = util.read_arff(opts.train_filename)
    test_partition  = util.read_arff(opts.test_filename)

    # sanity check
    print("num train =", train_partition.n, ", num classes =", train_partition.K)
    print("num test  =", test_partition.n, ", num classes =", test_partition.K)

    nb_model = NaiveBayes(train_partition)

    y_real = [] #list of real y's
    y_h = [] #list of predicted y's
    for example in test_partition.data: #loops through test example list
        y_hat = nb_model.classify(example.features) #calls classify on each example's feature
        y_real.append(int(example.label)) #appends the test data's label to y_real
        y_h.append(y_hat) #appends the predicted label to y_h\

    ln = len(nb_model.classes)
    l = len(test_partition.data)
    confusion_matrix = np.zeros((ln,ln)) #makes a confusion matrix of zeroes of the right size first
    for i in range(l):
        y_r = y_real[i]
        pred_y = y_h[i]
        confusion_matrix[y_r][pred_y] += 1  #adds one to diagonal elements of the numpy array

    n = 0 #keeps track of number of accurate data points
    for i in range(ln):
        n += confusion_matrix[i][i] #sums the diagonal


    accuracy = n / (l) #computes accuracy

    #printing here
    print("Accuracy", round(accuracy, 7), "(", int(n), " out of ", l , " correct)")
    print("Confusion Matrix:")
    print(confusion_matrix)
def test_nb_smaller_n():
    n_list = [
        1000, 500, 400, 300, 250, 200, 150, 100, 75, 50, 40, 30, 20, 15, 10
    ]
    for i in range(1, 5):
        smaller_n = []
        x_con, x_cat, y, xt_con, xt_cat, yt = testImport.read_data(i, 0)
        for n in n_list:
            if x_con is not None and x_cat is not None:
                x_con, x_cat, y = less_cases_separate(x_con, x_cat, y, n)
            if x_con is not None:
                x_con, y = less_cases_together(x_con, y, n)
            else:
                x_cat, y = less_cases_together(x_cat, y, n)
            model = NaiveBayes.NaiveBayes()
            model.fit(x_con, x_cat, y)
            smaller_n.append(evaluate_acc_NB(yt, model.predict(xt_con,
                                                               xt_cat)))
        plt.plot(n_list, smaller_n)
        plt.xlabel('N')
        plt.ylabel('performance')
    plt.legend(['ionosphere', 'census', 'poker', 'credit'])
    plt.savefig('nb_testing/smaller_n')
Exemple #26
0
def cross_eval(directory, parts, verbose=False):
    """Dokonuje sprawdzenia krzyżowego."""
    correct = 0
    total = 0

    for i in range(1, parts + 1):
        testlist = []
        trainlist = []
        for j in range(1, parts + 1):
            if i == j:
                testlist.extend(glob.glob("%s/part%d/*" % (directory, j)))
            else:
                trainlist.extend(glob.glob("%s/part%d/*" % (directory, j)))

        classifier = NaiveBayes.NaiveBayes(getwords)

        if verbose:
            print i, "\tTraining classifier"
        for doc in trainlist:
            train(classifier, doc, category(doc))

        if verbose:
            print "\tClassifying"
        for doc in testlist:
            bestcat = classify(classifier, doc)
            if verbose:
                print "\t", doc, ":", bestcat, "-",
            if bestcat == category(doc):
                if verbose:
                    print "correct"
                correct += 1
            else:
                if verbose:
                    print "wrong"
            total += 1

    return float(correct) / float(total)
Exemple #27
0
def test():

    nb_samples = 2000
    nb_rounds = 10
    x = np.zeros((nb_rounds))
    y = np.zeros((nb_rounds))
    x_time = 0.0
    y_time = 0.0
    for i in range(nb_rounds):
        bnbdata_X, bnbdata_Y = make_classification(n_samples=nb_samples,
                                                   n_features=20,
                                                   n_informative=20,
                                                   n_classes=5,
                                                   n_redundant=0)
        binarize(bnbdata_X)

        bnb = MultinomialNB()
        start_time = time.time()
        y_pred_official = bnb.fit(bnbdata_X, bnbdata_Y).predict(bnbdata_X)
        finish_time = time.time()
        y_time += (finish_time - start_time)

        mnb = nb.NaiveBayes(num_class=20)
        start_time = time.time()
        mnb.fit(bnbdata_X, bnbdata_Y)
        y_pred_scratch = mnb.predict(bnbdata_X)
        finish_time = time.time()
        x_time += (finish_time - start_time)

        print("mnb: ", (bnbdata_Y != y_pred_scratch).sum(), "bnb: ",
              (bnbdata_Y != y_pred_official).sum())
        y[i] = (bnbdata_Y != y_pred_official).sum()
        x[i] = (bnbdata_Y != y_pred_scratch).sum()
    print("mnb_ave_time: ", x_time / nb_rounds, "bnb_avg_time: ",
          y_time / nb_rounds)
    return np.var(x), np.var(y), np.average(x), np.average(y)
Exemple #28
0
    def choix_classifieurs(self, X_train, y_train, X_test, y_test):

        print(
            " \n\t\t--- Recherche des meilleurs classifieurs pour chaque méthode ---\n\n"
        )

        #Choix des classifieurs

        print(" --- Recherche pour Naive Bayes ---\n")
        #Naive Bayes
        nB = nb.NaiveBayes()
        clfNB = nB.choixNB(X_train, y_train, X_test, y_test)

        #Arbre de décision
        print(" --- Recherche pour Arbre de Decision ---\n")
        tree = dt.DecisionTree()
        clfTree, _ = tree.recherche_param(X_train, y_train, X_test, y_test)

        #K plus proches voisins
        print(
            "\n --- Pas de recherche de paramètres pour les K plus proches voisins ---\n"
        )
        kNN = knn.KNN()

        #SVM
        print(" --- Recherche pour la SVM ---\n")
        sVM = svm.SVM()
        clfSVM = sVM.hyperParameter(X_train, y_train)

        #Perceptron
        print(" --- Recherche pour le Perceptron ---\n")
        perceptron = perceptr.Perceptr()
        clfPerceptr = perceptron.rechercheHypParm(X_train, y_train, X_test,
                                                  y_test)

        return (clfNB, clfTree, kNN, clfPerceptr, clfSVM)
Exemple #29
0
    # 初始化方差,生成样本与标签
    sigma = np.zeros((k, 3, 3))
    for i in range(k):
        sigma[i, :, :] = np.diag(np.random.randint(10, 25, size=(3, )))
    sample, target = generate_random(sigma, N)
    feature_names = ['x_label', 'y_label', 'z_label']  # 特征数
    target_names = ['gaussian1', 'gaussian2', 'gaussian3', 'gaussian4']  # 类别
    data = Bunch(sample=sample,
                 feature_names=feature_names,
                 target=target,
                 target_names=target_names)
    sample_t, target_t = generate_random(sigma, N)
    data_t = Bunch(sample=sample_t, target=target_t)

    # 训练模型,计算精确度
    model = NaiveBayes()
    model.fit(data.sample, data.target.flatten())
    tar_train = np.array([model.predict(x) for x in data.sample],
                         dtype=np.uint8)
    tar_test = np.array([model.predict(x) for x in data_t.sample],
                        dtype=np.uint8)
    acc_train = model.score(data.sample, data.target.flatten())
    acc_test = model.score(data_t.sample, data_t.target.flatten())
    print_list = [acc_train * 100, acc_test * 100]
    print(
        'Accuracy on training set: {0[0]:.2f}%, accuracy on testing set: {0[1]:.2f}%.'
        .format(print_list))

    # 测试一个数据
    summary = model.normalized_prob(data_t.sample[100])
    print(summary)
Exemple #30
0
                   max_iter=10,
                   alpha=1e-4,
                   solver='sgd',
                   verbose=10,
                   random_state=1,
                   learning_rate_init=.1)
nn.fit(x_train, y_train)
y_pred_nn = nn.predict(x_test)

end3 = time.time()
nn_time = end3 - start3

#NB Model
start4 = time.time()

nb = NaiveBayes.NaiveBayes()
nb.fit(x_train, y_train)
y_pred_nb = nb.predict(x_test)

end4 = time.time()
nb_time = end4 - start4

print("SVM Time: {:0.2f} minute".format(svm_time / 60.0))

print("KNN Time: {:0.2f} minute".format(knn_time / 60.0))

print("NN Time: {:0.2f} minute".format(nn_time / 60.0))

print("NB Time: {:0.2f} minute".format(nb_time / 60.0))

# SVM report and analysis