Python GaussianNB.score Examples, sklearn.naive_bayes.GaussianNB.score Python Examples

Example #1

0

Show file

File: dataMining.py Project: wangwf/Codes

def test_classification():
    t = zeros(len(target))
    t[target == 'setosa'] = 1
    t[target == 'versicolor'] = 2
    t[target == 'virginica'] = 3

    from sklearn.naive_bayes import GaussianNB
    classifier = GaussianNB()
    classifier.fit(data,t) # training on the iris dataset

    print classifier.predict(data[0])
    print t[0]


    from sklearn import cross_validation
    train, test, t_train, t_test = cross_validation.train_test_split(data, t, test_size=0.4, random_state=0)

    classifier.fit(train,t_train) # train
    print classifier.score(test,t_test) # test

    from sklearn.metrics import confusion_matrix
    print confusion_matrix(classifier.predict(test),t_test)

    from sklearn.metrics import classification_report
    print classification_report(classifier.predict(test), t_test, target_names=['setosa', 'versicolor', 'virginica'])

    from sklearn.cross_validation import cross_val_score
    # cross validation with 6 iterations 
    scores = cross_val_score(classifier, data, t, cv=6)
    print scores

    from numpy import mean
    print mean(scores)

Example #2

0

Show file

File: task3.py Project: erprateek/exercises

def crossvalidate(X_trn, Y_trn):
    """Cross validation with comparison to classifiers that classify as only good or only bad"""
    import numpy as np
    X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X_trn.toarray(), Y_trn, test_size=0.4, random_state=1)
    dumb_labels1 = Y_test.copy()
    dumb_labels2 = Y_test.copy()
    dumb_labels1[dumb_labels1 == 0] = 1;    #Labels all 1s
    dumb_labels2[dumb_labels2 == 1] = 0;    #Labels all 0s
    dumb_labels3 = np.random.randint(2, size=(len(Y_test),))
    clf = GaussianNB()
    #clf = Perceptron()
    #clf = SGDClassifier()
    #clf = MultinomialNB()
    #clf = KNeighborsClassifier()
    #clf = LinearSVC()
    clf.fit(X_train, Y_train)
    accuracy = clf.score(X_test, Y_test)
    dumb_clf1_score = clf.score(X_test, dumb_labels1)
    dumb_clf2_score = clf.score(X_test, dumb_labels2)
    dumb_clf3_score = clf.score(X_test, dumb_labels3)
    print "Classifier Score : ", accuracy
    print "Dumb_classifier with all 1s : ", dumb_clf1_score
    print "Dumb classifier with all 0s : ", dumb_clf2_score
    print "Dumb classifier with random sequence : ", dumb_clf3_score
    return accuracy

Example #3

0

Show file

File: system_modules.py Project: manuwhs/Trapyng

def get_GNB(Xtrain, Xtest, Ytrain, Ytest):
    gnb = GaussianNB()
    gnb.fit(Xtrain,Ytrain)
    scores = np.empty((4))
    scores[0] = gnb.score(Xtrain,Ytrain)
    scores[1] = gnb.score(Xtest,Ytest)
    print('GNB, train: {0:.02f}% '.format(scores[0]*100))
    print('GNB, test: {0:.02f}% '.format(scores[1]*100))
    return gnb

Example #4

0

Show file

File: baseClassifiersLib.py Project: manuwhs/Trapyng

def get_GNB(Xtrain, Ytrain, Xtest = None , Ytest = None, verbose = 0):
    gnb = GaussianNB()
    gnb.fit(Xtrain,Ytrain)
    
    if (verbose == 1):
        scores = np.empty((2))
        scores[0] = gnb.score(Xtrain,Ytrain)
        print('GNB, train: {0:.02f}% '.format(scores[0]*100))
        if (type(Xtest) != type(None)):
            scores[1] = gnb.score(Xtest,Ytest)
            print('GNB, test: {0:.02f}% '.format(scores[1]*100))
    return gnb

Example #5

0

Show file

File: nbayes.py Project: kingr13/entire-src

def cvalidate():
    from sklearn import cross_validation
    targetset = np.genfromtxt(open('trainLabels.csv','r'), dtype='f16')
    y = [x for x in targetset]

    trainset = np.genfromtxt(open('train.csv','r'), delimiter=',', dtype='f16')
    X = np.array([x for x in trainset])
    
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size = 0.3, random_state = 0)

    gnb = GaussianNB()
    X_train, X_test = decomposition_pca(X_train, X_test)
    gnb.fit(X_train, y_train)

    print gnb.score(X_test, y_test)

Example #6

0

Show file

File: nb_quiz.py Project: stefanbuenten/nanodegree

def NBAccuracy(features_train, labels_train, features_test, labels_test):
    """ compute the accuracy of your Naive Bayes classifier """
    ### import the sklearn module for GaussianNB
    from sklearn.naive_bayes import GaussianNB

    ### create classifier
    clf = GaussianNB()

    ### fit the classifier on the training features and labels
    clf.fit(features_train, labels_train)

    ### use the trained classifier to predict labels for the test features
    pred = clf.predict(features_test)


    ### calculate and return the accuracy on the test data
    ### this is slightly different than the example, 
    ### where we just print the accuracy
    ### you might need to import an sklearn module
    
    #from sklearn.metrics import accuracy_score
    #accuarcy = accuracy_score(pred, labels_test)
    
    accuracy = clf.score(features_test, labels_test)
    return accuracy

Example #7

0

Show file

File: naive_bayes.py Project: mohamed-taha/sherlok-tools

def NB(text):
    ### features_train and features_test are the features for the training
    ### and testing datasets, respectively
    ### labels_train and labels_test are the corresponding item labels
    features_train, features_test, labels_train, labels_test = Preprocess()
    Ifeatures_train,Ifeatures_test,Ilabels_train=preprocess_input([text])

    # classification goes here

    clf = GaussianNB()

    # training
    train_t0 = time()
    clf.fit(features_train, labels_train)
    train_t1 = time()

    # prediction or testing
    test_t0 = time()
    predict = clf.predict(features_test)
    test_t1 = time()

    print "accuracy: ", clf.score(features_test, labels_test)
    print "#################################"
    print "tain time: ", round(train_t1 - train_t0, 3), "s"
    print "prediction time: ", round(test_t1 - test_t0, 3), "s"

    print "#################################"

    clf.fit(Ifeatures_train,Ilabels_train)
    print ("prediction of ",str(clf.predict(Ifeatures_test))[1])

    #print "prediction of ", clf.predict(preprocess_input(text))
    return  str(clf.predict(Ifeatures_test))[1]

Example #8

0

Show file

File: classifiers.py Project: obaid22192/machine-learning

class GaussianNBcls(object):
    """docstring for ClassName"""
    def __init__(self):
        self.gnb_cls = GaussianNB()
        self.prediction = None
        self.train_x = None
        self.train_y = None

    def train_model(self, train_x, train_y):
        try:
            self.train_x = train_x
            self.train_y = train_y
            self.gnb_cls.fit(train_x, train_y)
        except:
            print(traceback.format_exc())

    def predict(self, test_x):
        try:
            self.test_x = test_x
            self.prediction = self.gnb_cls.predict(test_x)
            return self.prediction
        except:
            print(traceback.format_exc())

    def accuracy_score(self, test_y):
        try:
            # return r2_score(test_y, self.prediction)
            return self.gnb_cls.score(self.test_x, test_y)
        except:
            print(traceback.format_exc())

Example #9

0

Show file

File: nb_author_id.py Project: dixu-ca/ud120-projects

def NBAccuracy(features_train, labels_train, features_test, labels_test):
    """ compute the accuracy of your Naive Bayes classifier """
    ### import the sklearn module for GaussianNB
    from sklearn.naive_bayes import GaussianNB

    ### create classifier
    clf = GaussianNB()

    t0 = time()
    ### fit the classifier on the training features and labels
    clf.fit(features_train, labels_train)
    print "training time:", round(time()-t0, 3), "s"

    ### use the trained classifier to predict labels for the test features
    import numpy as np
    t1 = time()
    pred = clf.predict(features_test)
    print "predicting time:", round(time()-t1, 3), "s"

    ### calculate and return the accuracy on the test data
    ### this is slightly different than the example,
    ### where we just print the accuracy
    ### you might need to import an sklearn module
    accuracy = clf.score(features_test, labels_test)
    return accuracy

Example #10

0

Show file

File: models.py Project: adityasiwan/MachineLearning-stock-prices

class PriceModel(object):
    """Linear Regression Model used to predict future prices"""
    def __init__(self, algorithm='gnb'):
        self.algorithm = algorithm

        if algorithm == 'svm':
            self.clf = SVC(kernel='rbf')
        elif algorithm == 'rf':
            self.clf = RandomForestClassifier(n_estimators=10,
                                                max_depth=None,
                                                min_samples_split=1,
                                                random_state=0)
        elif algorithm == 'lr':
            self.clf = LogisticRegression()
        elif algorithm == 'knn':
            self.clf = KNeighborsClassifier(n_neighbors=3)
        else:
            # Naive Bayes
            self.clf = GaussianNB()

    def train(self, X_train, y_train):
        self.clf.fit(X_train, y_train)

    def predict(self, x):
        return self.clf.predict(x)

    def score(self, X_test, y_test):
        return self.clf.score(X_test, y_test)

Example #11

0

Show file

File: NaiveBayes.py Project: prathameshnetake/BE_Project

def trainData(username):
	"""
	Trains the data based on the users performance so far
	Returns a trained Gaussian Naive Bayes model and updates result collection
	"""
	X = getFeatures(username)
	Y = getClassList(username)
	
	trainX = np.array(X)
	trainY = np.array(Y)

	gnb = GaussianNB()
	gnb.fit(trainX, trainY)
	print "Score with Naive Bayes: ", gnb.score(trainX, trainY)

	testData = words.posts.find({}, {'id' : 1,
									'points' : 1,
									'diff' : 1,
									'_id' : 0})
	testData = map(lambda x : (x['id'], x['points'], x['diff']), testData)

	with warnings.catch_warnings():
		warnings.simplefilter('ignore')
		for data in testData:
			testWord = words.posts.find_one({'id' : data[0]}, {'word' : 1, '_id' : 0})['word']
			wordClass = setWordClass(list(gnb.predict_proba(data))[0])
			classWord = result.posts.update({'username' : username}, {'$set' : {testWord : wordClass}}, upsert = True)

Example #12

0

Show file

File: classifier_benchmark.py Project: AloneGu/ml_algo_box

 def gaussian_bayes_test(self):
     print 'gaussian bayes test'
     g_bayes_clf = GaussianNB()
     print 'cross validation score',cross_val_score(g_bayes_clf, self.x_data, self.y_data)
     start_time = time.time()
     g_bayes_clf.fit(self.x_train, self.y_train)
     print 'score',g_bayes_clf.score(self.x_test, self.y_test)
     print 'time cost', time.time() - start_time

Example #13

0

Show file

File: nb_author_id.py Project: alevis/ud120-projects

def Accuracy(features_train,labels_train,features_test,labels_test):

    clf = GaussianNB()

    clf.fit(features_train,labels_train)

    pred = clf.predict(features_test)

    return clf.score(features_test,labels_test)

Example #14

0

Show file

File: build_model.py Project: Nazariy995/presidential-county-winner

	def run_naive_bayes(self):
		print "Running......"
		clf = GaussianNB()
		clf.fit(self.features_train, self.labels_train)
		pred = clf.predict(self.features_test)
		accuracy = clf.score(self.features_test, self.labels_test)
		#Save model and performance
		self.save_model(clf, "Naive Bayes")
		self.save_performance("Naive Bayes", accuracy)

Example #15

0

Show file

File: kinect_learning.py Project: vtrecsports/kinectmachinelearning

def Gaussian_NB(X, y, tst_size):
	X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size = tst_size, random_state = 0)
	clf = GaussianNB()
	score = 0
	for i in range(100):
		clf.fit(X_train, y_train)
		score += clf.score(X_test, y_test)
	score = score/100
	return score

Example #16

0

Show file

File: classification.py Project: martin-sicho/data_mining_2014

def naiveBayesClassifierTraining(compounds_all):
    print "Building naive Bayes classifier (" + str(NB_FOLDS) + "-fold cross-validation)..."
    # get the data
    keys = compounds_all.keys()
    fingerprint_data = [compounds_all[cmpnd_id]['fingerprint'] for cmpnd_id in keys]
    fingerprint_data = numpy.asarray(fingerprint_data)
    activity_data = [compounds_all[cmpnd_id]['active'] for cmpnd_id in keys]
    activity_data = numpy.asarray(activity_data)

    # perform K-fold cross-validation
    classifier = GaussianNB()
    kfold_xv_strat = cross_validation.StratifiedKFold(activity_data, NB_FOLDS, indices=False)
    confusion_matrices = []
    probabilities = []
    scores = []
    models = []
    true_activities = []
    aucs = []
    for train, test in kfold_xv_strat:
        fingerprint_data_train = fingerprint_data[train]
        fingerprint_data_test = fingerprint_data[test]
        activity_data_train = activity_data[train]
        activity_data_test = activity_data[test]

        # model building
        classifier.fit(fingerprint_data_train, activity_data_train)

        # testing
        activity_data_predictions = classifier.predict(fingerprint_data_test)
        models.append(classifier)

        probability_estimates = classifier.predict_proba(fingerprint_data_test)
        probabilities.append(probability_estimates)

        scores.append(classifier.score(fingerprint_data_test, activity_data_test))

        activity_confusion_matrix = confusion_matrix(activity_data_test, activity_data_predictions)
        confusion_matrices.append(activity_confusion_matrix)

        true_activities.append(activity_data_test)

        # ROC curves
        fpr, tpr, thresholds = roc_curve(activity_data_test, probability_estimates[:, 1])
        aucs.append(auc(fpr, tpr))
    classifier.fit(fingerprint_data, activity_data)
    print "Done."
    return {
        'confusion_matrices' : confusion_matrices
        , 'probabilities' : probabilities
        , 'scores' : scores
        , 'models' : models
        , 'true_activity_data' : true_activities
        , 'AUCs' : aucs
        , 'fingerprint_data' : fingerprint_data
        , 'activity_data' : activity_data
        , 'final_model' : classifier
    }

Example #17

0

Show file

File: nb_author_id.py Project: allisontngo/dataScience

def author_id(f_train, f_test, l_train, l_test):
	clf = GaussianNB()
	t0 = time()
	clf.fit(f_train, l_train)
	print "training time:", round(time()-t0, 3), "s"	
	t0 = time()
	pred = clf.predict(f_test)
	print "prediction time:", round(time()-t0, 3), "s"
	return 'accuracy: %f' % clf.score(f_test, l_test)

Example #18

0

Show file

File: NB_test.py Project: rszeto/image-rec-383

def run_test(trainData, trainLabels, testData, testLabels):
  start_time = time()
  classifier = GaussianNB()
  classifier.fit(trainData, trainLabels)
  score = classifier.score(testData, testLabels)
  duration = time() - start_time
  print "training set size: " + str(len(trainData))
  print "score: " + str(score)
  print "time: " + str(duration) + "\n"

Example #19

0

Show file

File: email_author_identification.py Project: linhbui/naive-bayes

def classify(features_train, labels_train, features_test, labels_test):
  classifier = GaussianNB()
  t0 = time()
  classifier.fit(features_train, labels_train)
  print "training time: ", round(time() - t0), "s"
  t1 = time()
  classifier.predict(features_test)
  print "predicting time: ", round(time() - t1), "s"
  return classifier.score(features_test, labels_test)

Example #20

0

Show file

File: microarray.py Project: jrf/microarray

def GNB_select_cv(X, Y, num_features):
    scores = []
    skf = cross_validation.StratifiedKFold(Y, n_folds=10)
    for train, test in skf:
        X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test]
        XRF_train, imp, ind, std = fitRF(X_train, y_train, est=2000)  # RFsel
        XRF_test = X_test[:, ind]  # reorder test set after RFsel
        clf = GaussianNB()
        clf.fit(XRF_train[:, 0:num_features], y_train)
        scores.append(clf.score(XRF_test[:, 0:num_features], y_test))
    score = np.mean(scores)
    return(score)

Example #21

0

Show file

File: classify.py Project: 32nguyen/Udacity_MachineLearning

def NBAccuracy(features_train, labels_train, features_test, labels_test):
    """ compute the accuracy of your Naive Bayes classifier """
    # import the sklearn module for GaussianNB
    from sklearn.naive_bayes import GaussianNB
    # create classifier
    clf = GaussianNB()
    # fit the classifier on the training features and labels
    clf.fit(features_train, labels_train)

    # use the trained classifier to predict labels for the test features
    # calculate and return the accuracy on the test data
    accuracy = clf.score(features_test, labels_test)
    return accuracy

Example #22

0

Show file

File: lab3.py Project: Sanchit23/Data-Mining-Reuters

def NBClassifier(filename, split_ratio):
	print "-"*15,"Naive Bayes Classfier","-"*15

	X, Y, X_labels, Y_labels = split_data(filename,split_ratio)

	# print X.shape, Y.shape, X_labels.shape, Y_labels.shape

	nb_model = GaussianNB()
	nb_model.fit(X, X_labels)

	print "\n accuracy =", nb_model.score(Y,Y_labels,sample_weight=None)

	print "-"*50

Example #23

0

Show file

File: nbayes.py Project: CBaader/science

def sklearn_model():
    """Fits the (parametric) Gaussian Naive Bayes classifier from sklearn on the iris
    dataset."""
    # load iris data, perform train/test split
    iris = load_iris()
    tts = cv.train_test_split(iris.data, iris.target, train_size=TRAIN_PCT)
    train_features, test_features, train_labels, test_labels = tts

    # train (gaussian) Naive Bayes model, make predictions on test set
    gnb = GaussianNB().fit(train_features, train_labels)
    predicted_labels = gnb.predict(test_features)

    # show accuracy pct
    print "accuracy = {0} %".format(round(100 * gnb.score(test_features, test_labels)))

Example #24

0

Show file

File: naiveBayes.py Project: gupta-abhay/Statistical-Review-Models

class NaiveBayes():
    def __init__(self):
        self.clf = GaussianNB()
        self.accuracy = 0
        self.y_out = []

    def train(self, X_train, y_train):
        self.clf.fit(X_train, y_train.ravel())

    def test(self, X_test):
        self.y_out = self.clf.predict(X_test)

    def score(self, X_test, y_test):
        self.accuracy = self.clf.score(X_test, y_test.ravel())

Example #25

0

Show file

File: ques3parta.py Project: abhiagar90/MachineLearning_COL774

def bayes():
    ##Naive Bayes
    from sklearn.naive_bayes import GaussianNB
    gnb = GaussianNB()
    gnb.fit(Xtrn, Ytrn)
    print gnb.score(Xtrn, Ytrn)
    print gnb.score(Xval1, Yval1)
    print gnb.score(Xval2, Yval2)
    print gnb.score(Xval3, Yval3)

Example #26

0

Show file

File: Jobbole_python.py Project: tigeryangjin/MyPy

def clsfr():
    train1_err = []
    # test1_err = []
    train2_err = []
    test2_err = []
    t = zeros(len(target))
    t[target == 'setosa'] = 1
    t[target == 'versicolor'] = 2
    t[target == 'virginica'] = 3
    classifier = GaussianNB()
    classifier.fit(data, t)  # training on the iris dataset
    for i in range(len(t)):
        if classifier.predict(data[i]) != array(t[i]):
            train1_err.append((classifier.predict(data[i]), t[i]))
    train, test, t_train, t_test = cross_validation.train_test_split(data, t, test_size=0.4, random_state=0)
    classifier.fit(train, t_train)  # train
    for i in range(len(t_train)):
        if classifier.predict(train[i]) != array(t_train[i]):
            train2_err.append((classifier.predict(train[i]), t_train[i]))
    for i in range(len(t_test)):
        if classifier.predict(test[i]) != array(t_test[i]):
            test2_err.append((classifier.predict(test[i]), t_test[i]))
    print('train error: ', train1_err)
    print('train count: ', len(t))
    print('train error count: ', len(train1_err))
    print('accuracy rate: ', classifier.score(data, t))
    print('*******************************************')
    print('train error: ', train2_err)
    print('test error: ', test2_err)
    print('train count: ', len(train))
    print('train error count: ', len(train2_err))
    print('test count: ', len(test))
    print('test error count: ', len(test2_err))
    print('test accuracy rate: ', classifier.score(test, t_test))  # test
    print('train accuracy rate: ', classifier.score(train, t_train))
    print('****************************************************')
    print(classification_report(classifier.predict(test), t_test, target_names=['setosa', 'versicolor', 'virginica']))

Example #27

0

Show file

File: classify.py Project: raghav297/crunchbase

	def nb_classify(self):
		print "Naive Bayes"

		clf = GaussianNB()
		clf.fit(self.descr, self.target)
		mean = clf.score(self.test_descr, self.test_target)

		pred = clf.predict(self.test_descr)
		accuracy = np.where(pred == self.test_target, 1, 0).sum() / float(len(self.test_target))
		print "Accuracy: %3f" % accuracy

		print "Mean : %3f" % mean
		print "Probability ", clf.class_prior_
		print "Mean of each feature per class ", clf.theta_
		print "Variance of each feature per class ", clf.sigma_
		print "Predict Probability ", clf.predict_proba(self.descr)

Example #28

0

Show file

File: 3.py Project: sandeepgithubrepo/ML

def compute_bayes_error():
    np.random.seed(0)
    mu1 = [0, 0]
    cov_mat_1 = 1 * np.eye(2)

    mu2 = [0, 0]
    cov_mat_2 = 16 * np.eye(2)

    #create unified training set from two normal distributions 
    X_vect = np.concatenate([np.random.multivariate_normal(mu1, cov_mat_1, 5000),
                        np.random.multivariate_normal(mu2, cov_mat_2, 5000)])
    y = np.zeros(10000)
    y[5000:] = 1

    # Fit the Naive Bayes' classifier
    clf = GaussianNB()
    clf.fit(X_vect, y)
    # predict the classification probabilities on a grid
    xlim = (-5, 5)
    ylim = (-5, 5)
    xx, yy = np.meshgrid(np.linspace(xlim[0], xlim[1], 50),
                         np.linspace(ylim[0], ylim[1], 70))
    Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])
    Z = Z[:, 1].reshape(xx.shape)

    acc = clf.score(X_vect,y)
    #Error rate
    error = 1- acc

    #Add decision boundery plot
    fig = plt.figure(figsize=(8, 8))
    fig.suptitle('decision boundary', fontsize=12)
    fig = plt.gcf()
    #set display window title
    fig.canvas.set_window_title('Decision Boundary')
    ax = fig.add_subplot(111)
    p1 = ax.scatter(X_vect[:, 0], X_vect[:, 1], c=y, cmap=plt.get_cmap('Set3'), zorder=5)
    p2 = ax.contour(xx, yy, Z, [0.5],linewidths=3, colors='k')
   
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    ax.set_xlabel('$x1$')
    ax.set_ylabel('$x2$')
    plt.clabel(p2, inline=3, fontsize=5)
    p2.collections[0].set_label("Decision Boundary")
    ax.legend(loc='lower right')
    return error

Example #29

0

Show file

File: datamining.py Project: mrcheng0910/url_lexical_analysis

def gausssian_data(X,y):
    """
    朴素贝叶斯算法
    :param X:
    :param y:
    :return:
    """
    from sklearn import metrics
    from sklearn.naive_bayes import GaussianNB
    model = GaussianNB()
    model.fit(X, y)
    expected = y
    predicted = model.predict(X)
    score = model.score(X,y)
    # print(metrics.classification_report(expected, predicted,labels=[0,1],target_names=['良性网址','恶意网址']))
    cm = metrics.confusion_matrix(expected, predicted)
    return score, cm

Example #30

0

Show file

File: Thesis_backend.py Project: jackRogers/Thesis

def gnb(training_data, training_target, testing_data, testing_target):
    """
	DESCRIPTION:
	
	
	INPUTS:
	
	
	OUTPUTS:
	
	
	EXAMPLE USAGE:
	
	"""

    clf = GaussianNB()
    clf.fit(training_data, training_target)
    return clf.score(testing_data, testing_target)

Example #31

0

Show file

File: poi_id_p3.py Project: annguyen1291/Udacity-Data-Analyst

# if features[obs][4] > 10:
#     print(features[obs][4]

#plt.plot(np.array(features[:,0]),np.array(features[:,1]))

# for k in data_dict:
#     for j in data_dict[k]:
#         print(data_dict["salary"][j]

# for k in data_dict:
#     print(data_dict[k]["bonus"]

###GAUSSIAN
from sklearn.naive_bayes import GaussianNB
clfGAU = GaussianNB().fit(features, labels)
print("Gaussian cf score is %f " % clfGAU.score(features, labels))

###SVM
from sklearn import svm
clfSVM = svm.SVC(kernel="rbf", C=0.001, gamma=0.001).fit(features, labels)
print("classic SVM score is %f " % clfSVM.score(features, labels))
# predSVM = clfSVM.fit(features, labels)
#print("classic accuracy_score score is %f " % accuracy_score(labels, predSVM)

###Decision Tree
from sklearn import tree
clfDT = tree.DecisionTreeClassifier(min_samples_split=50).fit(features, labels)
print("decision tree score % f" % clfDT.score(features, labels))

print("features_list", features_list)
print('most important features DT', clfDT.feature_importances_)

Example #32

0

Show file

# GaussianNB

# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(x_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(x_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)

score = classifier.score(x_test, y_test)
print(score)

# from sklearn import metrics
# metrics.accuracy_score(y_test, y_pred)

# Visualising the Training set results
from matplotlib.colors import ListedColormap
x_set, y_set = x_train, y_train
X1, X2 = nm.meshgrid(
    nm.arange(start=x_set[:, 0].min() - 1,
              stop=x_set[:, 0].max() + 1,
              step=0.01),
    nm.arange(start=x_set[:, 1].min() - 1,
              stop=x_set[:, 1].max() + 1,
              step=0.01))

Example #33

0

Show file

logreg = LogisticRegression()
logreg.fit(X_train, y_train)
print(logreg.score(X_train, y_train))

svc = SVC()
svc.fit(X_train, y_train)
print(svc.score(X_train, y_train))

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
print(knn.score(X_train, y_train))

gaussian = GaussianNB()
gaussian.fit(X_train, y_train)
print(gaussian.score(X_train, y_train))

linear_svc = LinearSVC()
linear_svc.fit(X_train, y_train)
print(linear_svc.score(X_train, y_train))

sgd = SGDClassifier()
sgd.fit(X_train, y_train)
print(sgd.score(X_train, y_train))

decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, y_train)
print(decision_tree.score(X_train, y_train))

random_forest = RandomForestClassifier(n_estimators=100)
random_forest.fit(X_train, y_train)

Example #34

0

Show file

File: nb_author_id.py Project: anujsinha3/ud120-projects

from time import time
sys.path.append("../tools/")
from email_preprocess import preprocess

### features_train and features_test are the features for the training
### and testing datasets, respectively
### labels_train and labels_test are the corresponding item labels
features_train, features_test, labels_train, labels_test = preprocess()

#########################################################
### your code goes here ###

from sklearn.naive_bayes import GaussianNB

clf = GaussianNB()
clf.fit(features_train, labels_train)
predictions = clf.predict(features_test)

total = (len(labels_test))
errors = 0

for i in range(total):
    if (predictions[i] != labels_test[i]):
        errors += 1
correct = float(total - errors)
total = float(total)
accuracy = correct / total
print clf.score(features_test, labels_test)  #can also use print(accuracy)

#########################################################

Example #35

0

Show file

    Chris has label 1
"""

import sys
from time import time
sys.path.append("../tools/")
from email_preprocess import preprocess
from sklearn.naive_bayes import GaussianNB

### features_train and features_test are the features for the training
### and testing datasets, respectively
### labels_train and labels_test are the corresponding item labels
features_train, features_test, labels_train, labels_test = preprocess()

#########################################################
### your code goes here ###
gnb = GaussianNB()

t0 = time()
y_pred = gnb.fit(features_train, labels_train)
print("training time:", round(time() - t0, 3), "s")

t0 = time()
y_pred = gnb.predict(features_test)
print("predicting time:", round(time() - t0, 3), "s")

accuracy = gnb.score(features_test, labels_test)
print(accuracy)

#########################################################

Example #36

0

Show file

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

df = pd.read_csv('./glass.csv')
y = df["Type"]
df1 = df.drop("Type", axis=1).copy()
# create training and testing
X_train, X_test, Y_train, Y_test = train_test_split(df1, y, test_size=0.15)

model = GaussianNB()
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
acc_svc = round(model.score(X_test, Y_test) * 100, 2)
print("Naive Byes accuracy with test is:", acc_svc)
"""plt.plot(Y_test,label="Y_test")
plt.plot(Y_pred,label="Y_pred")
plt.legend()
plt.show()"""

Example #37

0

Show file

File: naive-bayes.py Project: rajat27jha/Implementation-of-Naive-Bayes-ML-algorithm

for item in survived:
    if(item==0):
        colors.append('Red')
    else:
        colors.append('Green')
# plt.scatter(ages, fares, s=50, color=colors)
# s means size, we want size to be bigger
# plt.show()

# Step 3: Build a NB Model
Features = dataframe.drop(['Survived'], axis=1).values
Targets = dataframe['Survived'].values
Features_Train, Target_Train = Features[:710], Targets[:710]
# there are total 887 data points and 80% of that will be 710
Features_Test, Targets_test = Features[710:], Targets[710:]
# print(Features_Test)

model = GaussianNB()
model.fit(Features_Train, Target_Train)

# Step 4: Print Predicted vs Actuals
predicted_values = model.predict(Features_Test)
for item in zip(Targets_test, predicted_values):
    print('Actual was:', item[0], 'Predicted was', item[1])

# Step 5: Estimate Error
print('Accuracy is:', model.score(Features_Test, Targets_test))
# we didnt gave targets_test and predicted_values because
# score method itself calculates the predicted values from features_test and compares it with
# target_test and gives us the score

Example #38

0

Show file

        batch_x = uncompress(batch_x, 86796)
        # print batch_x.shape

        batch_x = np.sum(batch_x, axis=1)
        # print batch_x.shape
        batch_x = np.squeeze(batch_x)
        # print batch_x.shape

        # print 'y'
        # print batch_y.shape
        batch_y = np.repeat(batch_y, 50, axis=0)
        # print batch_y.shape

        # gnb.partial_fit(batch_x,batch_y,classes=[0,1])

        x = gnb.score(batch_x, batch_y)

        print x

        s += x
        i += 1

        print 'average : ', s / i

    # gnb.fit(X,Y)
    #
    print s / i

    fp = open(os.path.join('nb_logs', 'nb_object' + '.save'), 'wb')
    cPickle.dump(gnb, fp, protocol=cPickle.HIGHEST_PROTOCOL)
    fp.close()

Example #39

0

Show file

File: v7-VotingClassifier-Capstone1-CreditApprovalModelforBanks.py Project: Mandeepan/Data-Science

    % (NB_NonScaled_cross_val_scores.mean(),
       NB_NonScaled_cross_val_scores.std() * 2))

# In[24]:

if NB_NonScaled_cross_val_scores.mean() > 0.97:
    print("The Naive Bayes Model (Non Scaled) is overfitting in this case.")
else:
    NB_classifier.fit(X_train, y_train)
    NB_NonScaled_predicted = NB_classifier.predict(X_test)
    NB_NonScaled_prob_default = np.sum(NB_NonScaled_predicted) / len(
        NB_NonScaled_predicted)
    print(
        "The Default Probability based on Naive Bayes Model(Non Scaled) is :",
        '%.3f' % NB_NonScaled_prob_default)
    NB_NonScaled_accuracy = NB_classifier.score(X_test, y_test)
    print("The accuracy of Naive Bayes Model(Non Scaled) on test set is : ",
          '%.3f' % NB_NonScaled_accuracy)

# In[25]:

#output the result into the existing evaluation dataframe to compare with other models
new_evaluation = pd.DataFrame({
    'Model': ["Naive Bayes_NonScaled"],
    'Default_Probability': [NB_NonScaled_prob_default],
    'Cross_Validation_Accuracy': [NB_NonScaled_cross_val_scores.mean()],
    'Test_Accuracy': [NB_NonScaled_accuracy]
})
evaluation = evaluation.append(new_evaluation)
evaluation = evaluation[[
    'Model', 'Default_Probability', 'Cross_Validation_Accuracy',

Example #40

0

Show file

File: titanic.py Project: whappycoffee/KDD_course_project

sub.to_csv('svm.csv', index=False)
## knn

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, Y_train)
Y_pred = knn.predict(X_test)
acc_knn = round(knn.score(X_train, Y_train) * 100, 2)
acc_knn
sub = pd.DataFrame({'PassengerId': df_test["PassengerId"], 'Survived': Y_pred})
sub.to_csv('knn.csv', index=False)
# Gaussian Naive Bayes

gaussian = GaussianNB()
gaussian.fit(X_train, Y_train)
Y_pred = gaussian.predict(X_test)
acc_gaussian = round(gaussian.score(X_train, Y_train) * 100, 2)
acc_gaussian
sub = pd.DataFrame({'PassengerId': df_test["PassengerId"], 'Survived': Y_pred})
sub.to_csv('gnb.csv', index=False)

# Decision Tree

decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, Y_train)
Y_pred = decision_tree.predict(X_test)
acc_decision_tree = round(decision_tree.score(X_train, Y_train) * 100, 2)
acc_decision_tree
sub = pd.DataFrame({'PassengerId': df_test["PassengerId"], 'Survived': Y_pred})
sub.to_csv('tree.csv', index=False)
# Random Forest

Example #41

0

Show file

        return vectors

    #Vectorizes Y with 0 being neg and 1 being pos
    def CreateYVector(self):
        print("......building Y matrix")
        vector = np.zeros(5331 + 5331, dtype=int)
        count = 0

        for entry in vector:
            if count > 5330:
                vector[count] = 1
            count += 1

        return vector


data = DataPrep("rt-polaritydata/rt-polaritydata/rt-polarity.neg",
                "rt-polaritydata/rt-polaritydata/rt-polarity.pos")
#print(data.X)
#print(data.Y)
print("......spliting")
X_train, X_test, y_train, y_test = train_test_split(data.X,
                                                    data.Y,
                                                    test_size=0.33)
clf = GaussianNB(var_smoothing=.0001)
print("......training")
clf.fit(X_train, y_train)
print("Accuracy:  ")
print(clf.score(X_test, y_test))

Example #42

0

Show file

File: teamplate.py Project: haoanh98/machine-learning-A-Z

from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()
x_train = sc_x.fit_transform(x_train)
x_test = sc_x.transform(x_test)

from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(x_train, y_train)

y_pred = classifier.predict(x_test)

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

result = classifier.score(x_test, y_test)

from matplotlib.colors import ListedColormap
x_set, y_set = x_train, y_train
x1, x2 = np.meshgrid(
    np.arange(x_set[:, 0].min() - 1, x_set[:, 0].max() + 1, 0.01),
    np.arange(x_set[:, 1].min() - 1, x_set[:, 1].max() + 1, 0.01))
y_d = np.array([x1.ravel(), x2.ravel()]).T
plt.contourf(x1,
             x2,
             classifier.predict(np.array([x1.ravel(),
                                          x2.ravel()]).T).reshape(x1.shape),
             alpha=0.4,
             cmap=ListedColormap(('red', 'green')))
plt.xlim(x1.min(), x1.max())
plt.ylim(x2.min(), x2.max())

Example #43

0

Show file

File: 1105083.py Project: ibrahimshorif/Heart-Disease---Classifications-Machine-Learning-

svm = SVC(random_state=1)
svm.fit(x_train.T, y_train.T)

acc = svm.score(x_test.T, y_test.T) * 100
accuracies['SVM'] = acc
print("Test Accuracy of SVM Algorithm: {:.2f}%".format(acc))

# In[35]:

#Naive Bayes Algorithm

from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(x_train.T, y_train.T)

acc = nb.score(x_test.T, y_test.T) * 100
accuracies['Naive Bayes'] = acc
print("Accuracy of Naive Bayes: {:.2f}%".format(acc))

# In[36]:

#Decision Tree Algorithm

from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(x_train.T, y_train.T)

acc = dtc.score(x_test.T, y_test.T) * 100
accuracies['Decision Tree'] = acc
print("Decision Tree Test Accuracy {:.2f}%".format(acc))

Example #44

0

Show file

                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('Predicted Class', fontsize=12)
    plt.xlabel('Actual Class', fontsize=12)


# predict data
for i in range(3):
    #Use Gaussian Naive Bayes method
    classifier = GaussianNB()
    #Fit the model
    classifier.fit(train[i], classification_train)
    #Calculate the result & accuracy
    result = classifier.predict(test[i])
    accuracy = classifier.score(test[i], classification_test)
    #Calculate the probability estimates of the positive class
    prob_data = classifier.predict_proba(test[i])
    prob_data = prob_data[:, 1]
    #Calculate fpr & ftr
    fpr, tpr, thresholds = metrics.roc_curve(classification_test, prob_data)
    fprs.append(fpr)
    tprs.append(tpr)
    #Calculate confusion matrix, precision & recall
    conf_mat = metrics.confusion_matrix(classification_test, result)
    precision = metrics.precision_score(classification_test, result)
    recall = metrics.recall_score(classification_test, result)
    roc_auc = metrics.auc(fpr, tpr)

    print 'min_df = ' + str(min_df[i])
    print 'dimension reduction method: ' + str(method[i])

Example #45

0

Show file

File: NaiveBayes.py Project: gautamdasika/Naive-Bayes-SVM-Classifiers-for-NBA-prediction

    'AST', 'BLK']

#Pandas DataFrame allows you to select columns.
#We use column selection to split the data into features and class.
nba_feature = nba[feature_columns]
nba_class = nba[class_column]

print(nba_feature[0:3])
print(list(nba_class[0:3]))

train_feature, test_feature, train_class, test_class = \
    train_test_split(nba_feature, nba_class, stratify=nba_class, \
    train_size=0.75, test_size=0.25, random_state=0)

training_accuracy = []
test_accuracy = []

nb = GaussianNB().fit(train_feature, train_class)
print("Test set score: {:.3f}".format(nb.score(test_feature, test_class)))
prediction = nb.predict(test_feature)
print("Confusion matrix:")
print(
    pd.crosstab(test_class,
                prediction,
                rownames=['True'],
                colnames=['Predicted'],
                margins=True))

scores = cross_val_score(nb, nba_feature, nba_class, cv=10)
print("Cross-validation scores: {}".format(scores))
print("Average cross-validation score: {:.2f}".format(scores.mean()))

Example #46

0

Show file

logic_reg = LogisticRegression()
logic_reg.fit(x_train, y_train)
print("Test accuarcy: {:.2f}%".format(logic_reg.score(x_test, y_test) * 100))

#for Knn model
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(x_train, y_train)
print("Test accuracy of knn is {:.2f}%".format(
    knn.score(x_test, y_test) * 100))
'''#for Svm model   not working
from sklearn.svm import SVC
sps=SVC(random_state=1,kernel='rbf')
sps.fit_transpose(x_train,y_test)
print("SVM Accuracy report {:.2f}%".format(sps.score(x_test,y_test)*100))
'''
#naive bayes
from sklearn.naive_bayes import GaussianNB
nai = GaussianNB()
nai.fit(x_train, y_train)
print("Naive Bayes Accuracy report {:.2f}%".format(
    nai.score(x_test, y_test) * 100))

#Random forest
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=1000, random_state=1)
rf.fit(x_train, y_train)
print("Random Forest Accuracy report {:.2f}%".format(
    rf.score(x_test, y_test) * 100))

Example #47

0

Show file

File: titanic_dataset_analysis.py Project: shanidmalayil/python-machine-learning

#predict method
print " 1 means survived, 0 means not survived"
print 'Classified as :', classifier.predict([data[0]])
print 'Classified as :', classifier.predict([[3,27,0]])
print 'Classified as :', classifier.predict([data[2],data[4]])

#TEST DATA-training and classiffication
#split to 60 percent-train and 40 perccent-test of total data
from sklearn import cross_validation
train, test, t_train, t_test = cross_validation.train_test_split(data, t,test_size=0.4, random_state=0)
print 'Number of records used for training',train.shape
print 'Number of records used for testing',test.shape

#train and test
classifier.fit(train,t_train) # train with 1st part:60 percent
print 'Accuracy is =',classifier.score(test,t_test) # test with 2nd part:40 percent

#CONFUSION MATRICS TO SHOW ACCURACY
from sklearn.metrics import confusion_matrix
print 'confusion matrix\n',confusion_matrix(classifier.predict(test),t_test)

#Function that gives us a complete report on the performance
from sklearn.metrics import classification_report
print classification_report(classifier.predict(test),t_test,target_names=['Survived', 'Not Survived'])

#Sophisticated evaluation model like Cross Validation. The idea behind the model is simple: the data is split into train and test sets several consecutive times and the averaged value of the prediction scores obtained with the different sets is the evaluation of the classifier
from sklearn.cross_validation import cross_val_score
# cross validation with 6 iterations
scores = cross_val_score(classifier, data, t, cv=20)
#print scores

Example #48

0

Show file

File: prueba.py Project: JuanMorenoC/limpiezaDataset

def bayes():
    #se carga el dataset
    dataset = pd.read_csv("Dataset_Bayes.csv")

    #Se imprime la cantidad de usuarios que ganaron y perdieron
    print(dataset.groupby('Gano').size())

    #Imprime grafica de barras  Gano vs Variables
    dataset.drop(['Gano'], axis=1).hist()
    plt.show()

    #se elimina userId, completer son irrelevantes para aplicar el metodo
    dataset_limpio = dataset.drop(['userId', 'completer'], axis=1)
    dataset_limpio.describe()

    # se limpia el dataset de valores NaN, Inf
    dataset_limpio = limpiar_dataset_Para_Bayes(dataset_limpio)

    #se elimna y obtiene la variable Gano con el fin de poder buscar las 5 mejores variables que pueden determinar si Gano o perdio
    a = dataset_limpio.drop(['Gano'], axis=1)
    b = dataset_limpio['Gano']
    best = SelectKBest(k=5)
    a_new = best.fit_transform(a, b)
    a_new.shape
    selected = best.get_support(indices=True)
    print("Mejores 5 variables")
    print(a.columns[selected])

    #Imprime grafica de correlación de pearson con respecto a las 5 mejores variables
    used_features = a.columns[selected]
    colormap = plt.viridis()
    plt.figure(figsize=(12, 12))
    plt.title('Coeficiente de correlación de Pearson', y=1.05, size=15)
    sns.heatmap(dataset_limpio[used_features].astype(float).corr(),
                linewidths=0.1,
                vmax=1.0,
                square=True,
                cmap=colormap,
                linecolor='white',
                annot=True)
    plt.show()

    #se dividen los datos de entrada en 'entrenamiento' y 'pruebas'
    a_entrenamiento, a_pruebas = train_test_split(dataset_limpio,
                                                  test_size=0.2,
                                                  random_state=6)
    b_entrenamiento = a_entrenamiento["Gano"]
    b_pruebas = a_pruebas["Gano"]

    gnb = GaussianNB()
    gnb.fit(a_entrenamiento[used_features].values, b_entrenamiento)
    y_pred = gnb.predict(a_pruebas[used_features])

    print('Precisión en el set de Entrenamiento: {:.2f}'.format(
        gnb.score(a_entrenamiento[used_features], b_entrenamiento)))
    print('Precisión en el set de Pruebas: {:.2f}'.format(
        gnb.score(a_pruebas[used_features], b_pruebas)))

    #cinco mejores variables
    #'SRL', 'Atry to lecture', 'num_events', 'grade', 'cluster'
    #tomamos datos del dataset donde un usuario perdio y gano (0,1) con relacion a las 5 mejores variables
    print(
        gnb.predict([[1.666666667, 0, 2, 5.999999866, 0],
                     [2.041666667, 150, 151, 62.00000048, 1]]))

Example #49

0

Show file

    Data.extend(i)
X = [i[0] for i in Data]
Y = [i[1] for i in Data]
#print(t)
split = len(corpus)-len(corpus)//5


tf = CountVectorizer()
t = tf.fit_transform(X).toarray()
print(t.shape)
print(len(Y))
x_train = t[:split]
x_test = t[split:]
y_train = Y[:split]
y_test =Y[split:]
from sklearn.naive_bayes import  GaussianNB
clf = GaussianNB()
clf.fit(x_train,y_train)
print("Finished Trainning")
print(clf.score(x_test,y_test))

#from nltk.tag import tnt
#tnt_pos_tagger = tnt.TnT()
#tnt_pos_tagger.train(train)

#print(word_tokenize(word_test))

#print(tnt_pos_tagger.evaluate(test))

#print(tnt_pos_tagger.tag(word_tokenize(word_test)))

Example #50

0

Show file

File: Ensembled Machine Learning.py Project: vitasiku/python-

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
iris = datasets.load_iris()
x = iris.data[:, 2:][0:140]
y = iris.target[0:140]
x_test = iris.data[:, 2:][141:150]
y_test = iris.target[141:150]
'''NAIVE BAYES'''

from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(x, y)
nb = model.score(x, y)

pred = model.predict(x_test)
sum(x == 0 for x in pred - y_test) / len(pred)
'''DECISION TREES'''

from sklearn import tree
model = tree.DecisionTreeClassifier(class_weight=None,
                                    criterion='entropy',
                                    max_depth=20,
                                    max_features=x.shape[1],
                                    max_leaf_nodes=4,
                                    min_samples_leaf=1,
                                    min_samples_split=1,
                                    min_weight_fraction_leaf=0.0,
                                    presort=False,

Example #51

0

Show file

File: bClus.py Project: pkucp/botnetDetection-1

temp = X_test.groupby(['label'])
t = temp.packets.count()
label_predicted = [-1 if e == 0 else e for e in label_predicted]
test_labels = [-1 if e == 0 else e for e in test_labels]

pred = [a*b for a,b in zip(label_predicted,t)]
act =  [a*b for a,b in zip(test_labels,t)]
pp =0
pn = 0
nn = 0
np = 0
for a,b in zip(pred,act):
    if a>0 and b>0:
        pp +=a;
    elif a<0 and b<0:
        nn -= a;
    elif a>0 and b<0:
        pn += a;
    else :
        np += b;
print pp
print nn
print np
print pn
###############################################################################
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(train_cluster_features,train_labels)
label_predicted=gnb.predict(test_cluster_features)
gnb.score(test_cluster_features,test_labels)
###############################################################################

Example #52

0

Show file

File: 483_project_3.py Project: DanielSollis/483_project_3

# Training set and targets
X = bank.drop(columns='y').values
t = bank['y'].values

#experiment 1
from sklearn.model_selection import train_test_split
X_train, X_test, t_train, t_test = train_test_split(X, t, test_size = 0.2, shuffle = True)

#experiment 2
from sklearn.naive_bayes import GaussianNB
gaussian_clf = GaussianNB()
gaussian_clf.fit(X_train, t_train)

#experiment 3
from sklearn.metrix import confusion_matrix
gaussian_score = gaussian_clf.score(X_test, t_test)

gaussian_pred - gaussian_clf.predict(X_test)
cm = confusion_matrix(t_test, gaussian_pred)

gaussian_proba = gaussian_clf.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(t_test, gaussian_proba)
auc = roc_auc_score(t_test, gaussian_proba)

print "Gausian CLF Score: " + str(gaussian_score)
print "Confusion Matrix "
print cm
print "Gaussian CLF auc Score: " + str(roc_auc_score)

plt.figure()
plt.plot(fpr, tpr)

Example #53

0

Show file

File: digit.py Project: 1006269809/Data_mining

from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB

digits = load_digits()

x = digits.data  #样本
y = digits.target  #标签

#划分训练集、测试集，其中测试集的比例为0.3
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
"""高斯贝叶斯分类器：GaussianNB"""
gnb = GaussianNB().fit(x_train, y_train)  #利用训练集数据训练模型
gnb_predict = gnb.predict(x_test)  #对测试集进行预测
for i in range(10):  #输出前十个预测结果与实际结果进行比对
    print(f"actual:{y_test[i]},predict:{gnb_predict[i]}")
    gnb_score = gnb.score(x_test, y_test)
print(f"accuracy(GaussianNB):{gnb_score}")

print("-------------------")
"""多项贝叶斯分类器：MultinomialNB"""
mnb = MultinomialNB().fit(x_train, y_train)
mnb_predict = mnb.predict(x_test)
for i in range(10):
    print(f"actual:{y_test[i]},predict:{mnb_predict[i]}")
    mnb_score = mnb.score(x_test, y_test)
print(f"accuracy(MultinomialNB):{mnb_score}")

print("-------------------")
"""伯努利贝叶斯分类器：BernoulliNB"""
bnb = BernoulliNB().fit(x_train, y_train)
bnb_predict = bnb.predict(x_test)

Example #54

0

Show file

label_names = data['target_names']
labels = data['target']
feature_names = data['feature_names']
features = data['data']

# Look at our data
print(label_names)
print(labels[0])
print(feature_names[0])
print(features[0])

#Split the data
train, test, train_labels, test_labels = train_test_split(features,
                                                          labels,
                                                          test_size=0.33,
                                                          random_state=42)

#Classifier
gnb = GaussianNB()

#Training
model = gnb.fit(train, train_labels)

#Make prediction
preds = gnb.predict(test)
print(preds)

#Evaluate accuracy
print(accuracy_score(test_labels, preds))
print(gnb.score(test, test_labels))

Example #55

0

Show file

dt_model = DecisionTreeClassifier(criterion='entropy', max_depth=3)
dt_model.fit(train_set, train_labels)

dt_model.score(test_set, test_labels)

y_predict = dt_model.predict(test_set)
y_predict[:5]

test_set.head(5)
"""### Naive Bayes"""

naive_model = GaussianNB()
naive_model.fit(train_set, train_labels)

prediction = naive_model.predict(test_set)
naive_model.score(test_set, test_labels)
"""### Random Forest classifier"""

randomforest_model = RandomForestClassifier(max_depth=2, random_state=0)
randomforest_model.fit(train_set, train_labels)

Importance = pd.DataFrame(
    {'Importance': randomforest_model.feature_importances_ * 100},
    index=train_set.columns)
Importance.sort_values('Importance', axis=0, ascending=True).plot(
    kind='barh',
    color='r',
)

predicted_random = randomforest_model.predict(test_set)
randomforest_model.score(test_set, test_labels)

Example #56

0

Show file

File: 2016048_A2.py Project: Kvatsx/Data-Mining-Assignments

plt.ylabel("Accuracy")
plt.legend()

# ### Question 2

# In[169]:

pca = PCA(n_components=2)
pca.fit(X)
X_New = pca.transform(X)
X_Test_New = pca.transform(X_Test)

# Naive Baye classifier
clf1 = GaussianNB()
clf1 = clf1.fit(X_New, Y)
print("GaussianNB Acc: {}".format(clf1.score(X_Test_New, Y_Test) * 100))

# KNeighborsClassifier
clf2 = KNeighborsClassifier(n_jobs=-1)
clf2 = clf2.fit(X_New, Y)
print("KNeighborsClassifier Acc: {}".format(
    clf2.score(X_Test_New, Y_Test) * 100))

# DecisionTreeClassifier
clf3 = DecisionTreeClassifier(max_depth=BestDep)
clf3 = clf3.fit(X_New, Y)
print("DecisionTreeClassifier Acc: {}".format(
    clf3.score(X_Test_New, Y_Test) * 100))

for clf in [clf1, clf2, clf3]:
    print(clf.score(X_New, Y))

Example #57

0

Show file

knn = neighbors.KNeighborsClassifier(n_neighbors=5)
rfc = RandomForestClassifier(n_estimators=10)
lor = LogisticRegression(random_state=1)
gnb = GaussianNB()
vot = VotingClassifier(estimators=[('lr', lor), ('rf', rfc), ('gnb', gnb),
                                   ('knn', knn)],
                       voting='hard')
lr.fit(x_train, y_train)
svc.fit(x_train, y_train)
knn.fit(x_train, y_train)
rfc.fit(x_train, y_train)
lor.fit(x_train, y_train)
gnb.fit(x_train, y_train)
vot.fit(x_train, y_train)
print("LogisticRegression", lor.score(x_test, y_test))
print("GaussianNB", gnb.score(x_test, y_test))
print("RandomForestClassifier ", rfc.score(x_test, y_test))
print("KNeighborsClassifier ", knn.score(x_test, y_test))
print("SVC ", svc.score(x_test, y_test))
print("LinearRegression ", lr.score(x_test, y_test))
print('VotingClassifier', vot.score(x_test, y_test))
N = 7
x = range(N)
y = [
    lor.score(x_test, y_test),
    gnb.score(x_test, y_test),
    rfc.score(x_test, y_test),
    knn.score(x_test, y_test),
    svc.score(x_test, y_test),
    lr.score(x_test, y_test),
    vot.score(x_test, y_test)

Example #58

0

Show file

    plt.figure()
    plt.bar(np.arange(2) + 0.2, trainsc, width=0.4, color='c', align='center')
    plt.bar(np.arange(2) + 0.6, testsc, width=0.4, color='r', align='center')
    plt.xticks(np.arange(2) + 0.4, alg)
    plt.title('Linear Discriminant Analysis accuracy')
    plt.ylabel('Accuracy')
    plt.legend(['Train', 'Test'])
    plt.show()

#%% Naive Bayes Gaussian

if (GNB_cl == 1):
    nb = GaussianNB()
    nb.fit(Xtrain, Ytrain)
    scores = np.empty((4))
    scores[0] = nb.score(Xtrain, Ytrain)
    scores[1] = nb.score(Xtest, Ytest)
    print('Gaussian Naive Bayes, train: {0:.02f}% '.format(scores[0] * 100))
    print('Gaussian Naive Bayes, test: {0:.02f}% '.format(scores[1] * 100))

    bnb = BaggingClassifier(GaussianNB(), max_samples=0.5, n_jobs=-1)
    bnb.fit(Xtrain, Ytrain)
    scores[2] = bnb.score(Xtrain, Ytrain)
    scores[3] = bnb.score(Xtest, Ytest)
    print('Bagging Naive Bayes, test: {0:.02f}% '.format(scores[2] * 100))
    print('Bagging Naive Bayes, test: {0:.02f}% '.format(scores[3] * 100))

    alg = ['Naive Bayes', 'Bagged Naive Bayes']
    trainsc = [scores[0], scores[2]]
    testsc = [scores[1], scores[3]]
    plt.figure()

Example #59

0

Show file

def modeloNaiveBayesSampling():
    
    #Carga del dataset almacenado en csv
    dataset = pd.read_csv('dataset2.csv')
   
    
   
    #Reducción de la dimensionalidad, con Feature Selection, usando SelctKBest de Sklearn    
    X=dataset.drop(['Plag'], axis=1)
    y=dataset['Plag']
     
    best=SelectKBest(k=50)
    X_new = best.fit_transform(X, y)
    X_new.shape
    selected = best.get_support(indices=True)
    #print(X.columns[selected])
    used_features =X.columns[selected]
    
    # Separación de los datos del dataset en los cjtos de entrenamiento y test:
    X_train, X_test = train_test_split(dataset, test_size=0.3, random_state=6) 
    y_train =X_train["Plag"]
    y_test = X_test["Plag"]
    
	
    #Configuración del muestreo que combina oversampling y subsamplig:
    os = make_pipeline(
    SMOTE(sampling_strategy={1: 5000}),
    NearMiss(sampling_strategy={0: 15000}))


    X_train_res, y_train_res = os.fit_resample(X_train, y_train)
  
    X_test_res, y_test_res= (X_test, y_test)
     
    
    # Uso del clasificador Gausiano:
    gnb = GaussianNB()
    
    
    #Con el modelo creado, se utiliza fit() para el aprendizaje
    gnb.fit(
        X_train_res[used_features].values,
        y_train_res
    )
    y_pred = gnb.predict(X_test_res[used_features])
    
    #Calculamos la precisión
     
    print('Precisión en el set de Entrenamiento: {:.2f}'
         .format(gnb.score(X_train_res[used_features], y_train_res)))
    print('Precisión en el set de Test: {:.2f}'
         .format(gnb.score(X_test_res[used_features], y_test_res)))
    
    
    #Calculo de la matriz de confusión
    print(confusion_matrix(y_test_res, y_pred))
    
    print ("Distribución inicial de entrenamiento{}".format(Counter(y_train)))
    print ("Distribución finalde entrenamiento: {}".format(Counter(y_train_res)))
    
    print ("Distribución inicial de test {}".format(Counter(y_test)))
    print ("Distribución final de test: {}".format(Counter(y_test_res)))

Example #60

0

Show file

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report

# Importing data set
train_df = pd.read_csv('glass.csv')
X = train_df.drop("Type", axis=1)
Y = train_df["Type"]

# Training and testing data
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    Y,
                                                    test_size=0.2,
                                                    random_state=0)

# using navie bays
gnb = GaussianNB()

# Showing the result of test data
Y_prediction = gnb.fit(X_train, y_train).predict(X_test)
acc_gnb = round(gnb.score(X_test, y_test) * 100)

# Calculating the accuracy
print("Accuracy is:", acc_gnb)
print(classification_report(y_test, Y_prediction))