Esempi in Python per GaussianNB.predict, esempi in Python per sklearn.naive_bayes.GaussianNB.predict

Esempio n. 1

0

Mostra file

File: naive_bayes.py Progetto: webmusing/thinkful

def main():
	config = dict()
	config['resource_dir'] = os.path.abspath(os.path.join(os.path.realpath(__file__), '../../')) + "/resources/"
	config['raw_file'] = config['resource_dir'] + "ideal_weight.csv"
	ideal_weight_df = None

	ideal_weight_df = pd.read_csv(config['raw_file'])
	ideal_weight_df.columns = [x.replace("\'","") for x in ideal_weight_df.columns.values.tolist()]
	
	ideal_weight_df.loc[:,'sex'] = ideal_weight_df['sex'].map(lambda x: x.replace("\'",""))
	#print ideal_weight_df
	#print config

	plt.hist(ideal_weight_df['actual'], alpha=0.5, label='actual')
	plt.hist(ideal_weight_df['ideal'], alpha=0.5, label='ideal')
	plt.show() # figure_1.png

	ideal_weight_df['diff'].hist()

	ideal_weight_df['sex_id'] = ideal_weight_df['sex'].map(lambda x: 1 if x == 'Male' else 0)

	clf = GaussianNB()
	clf.fit(ideal_weight_df[['actual','ideal','diff']],ideal_weight_df['sex'])

	print clf.predict([[145,160,-15]]) # male

	print clf.predict([[160,145,15]]) # female

Esempio n. 2

0

Mostra file

File: dataMining.py Progetto: wangwf/Codes

def test_classification():
    t = zeros(len(target))
    t[target == 'setosa'] = 1
    t[target == 'versicolor'] = 2
    t[target == 'virginica'] = 3

    from sklearn.naive_bayes import GaussianNB
    classifier = GaussianNB()
    classifier.fit(data,t) # training on the iris dataset

    print classifier.predict(data[0])
    print t[0]


    from sklearn import cross_validation
    train, test, t_train, t_test = cross_validation.train_test_split(data, t, test_size=0.4, random_state=0)

    classifier.fit(train,t_train) # train
    print classifier.score(test,t_test) # test

    from sklearn.metrics import confusion_matrix
    print confusion_matrix(classifier.predict(test),t_test)

    from sklearn.metrics import classification_report
    print classification_report(classifier.predict(test), t_test, target_names=['setosa', 'versicolor', 'virginica'])

    from sklearn.cross_validation import cross_val_score
    # cross validation with 6 iterations 
    scores = cross_val_score(classifier, data, t, cv=6)
    print scores

    from numpy import mean
    print mean(scores)

Esempio n. 3

0

Mostra file

File: location.py Progetto: wdldgithub/gbServer

 def MyNaiveBayes(object):
     pre = PreProcess()
     (training_value, test_value, test_pos_x, test_pos_y, training_pos_x, training_pos_y) = pre.split()
     # 模型初始化
     clf_x = GaussianNB()
     clf_y = GaussianNB()
     # 进行模型的训练
     clf_x.fit(training_value, training_pos_x)
     clf_y.fit(training_value, training_pos_y)
     # 计算结果
     result_pos_x = clf_x.predict(test_value)
     result_pos_y = clf_y.predict(test_value)
     '''
     print result_pos_x
     print test_pos_x
     print result_pos_y
     print test_pos_y
     '''
     # 计算误差
     x_dis = []
     y_dis = []
     d_dis = []
     for i in range(len(result_pos_x)):
         x_dis.append(abs(result_pos_x[i] - test_pos_x[i]))
         y_dis.append(abs(result_pos_y[i] - test_pos_y[i]))
         d_dis.append(math.sqrt((result_pos_x[i]-test_pos_x[i])**2+(result_pos_y[i]-test_pos_y[i])**2))
     x = (sum(x_dis))/len(result_pos_x)
     y = (sum(y_dis))/len(result_pos_y)
     d = (sum(d_dis))/len(d_dis)
     print x, y, d
     return x, y, d

Esempio n. 4

0

Mostra file

File: gaussianNB.py Progetto: USCDataScience/NN-fileTypeDetection

class GaussianNBClassifier:

	def __init__(self):
		"""
		This is the constructor responsible for initializing the classifier
		"""
		self.outputHeader = "#gnb"
		self.clf = None

	def buildModel(self):
		"""
		This builds the model of the Gaussian NB classifier
		"""
		self.clf =  GaussianNB()

	def trainGaussianNB(self,X, Y):
		"""
		Training the Gaussian NB Classifier
		"""
		self.clf.fit(X, Y)

	def validateGaussianNB(self,X, Y):
		"""
		Validate the Gaussian NB Classifier
		"""
		YPred = self.clf.predict(X)
		print accuracy_score(Y, YPred)

	def testGaussianNB(self,X, Y):
		"""
		Test the Gaussian NB Classifier
		"""
		YPred = self.clf.predict(X)
		print accuracy_score(Y, YPred)

Esempio n. 5

0

Mostra file

File: naive_bayes.py Progetto: mohamed-taha/sherlok-tools

def NB(text):
    ### features_train and features_test are the features for the training
    ### and testing datasets, respectively
    ### labels_train and labels_test are the corresponding item labels
    features_train, features_test, labels_train, labels_test = Preprocess()
    Ifeatures_train,Ifeatures_test,Ilabels_train=preprocess_input([text])

    # classification goes here

    clf = GaussianNB()

    # training
    train_t0 = time()
    clf.fit(features_train, labels_train)
    train_t1 = time()

    # prediction or testing
    test_t0 = time()
    predict = clf.predict(features_test)
    test_t1 = time()

    print "accuracy: ", clf.score(features_test, labels_test)
    print "#################################"
    print "tain time: ", round(train_t1 - train_t0, 3), "s"
    print "prediction time: ", round(test_t1 - test_t0, 3), "s"

    print "#################################"

    clf.fit(Ifeatures_train,Ilabels_train)
    print ("prediction of ",str(clf.predict(Ifeatures_test))[1])

    #print "prediction of ", clf.predict(preprocess_input(text))
    return  str(clf.predict(Ifeatures_test))[1]

Esempio n. 6

0

Mostra file

File: learners.py Progetto: 2dpodcast/cs109-project-1

class GaussianNBLearner(AbstractLearner):
    """
    Gaussian Naive Bayes Learner

    http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html

    We need to use X.toarray() because those functions expect dense arrays.
    """

    def __init__(self):
        self.nb = GaussianNB()

    def train(self, X, Y):
        if hasattr(X, 'toarray'):
            self.nb.fit(X.toarray(), Y)
        else:
            self.nb.fit(X, Y)

    def predict(self, X):
        if (hasattr(X, "toarray")):
            return self.nb.predict(X.toarray())
        else:
            return self.nb.predict(X)

    def score(self, X, Y):
        return np.mean(np.abs(self.nb.predict(X) - np.array(Y)))

Esempio n. 7

0

Mostra file

File: classifier.py Progetto: bhnascar/Viral-Art

def naive_bayes(train_features, train_labels, test_features, test_labels):
    # Train SVM classifier
    model = GaussianNB()
    model.fit(train_features, train_labels)
    test_results = model.predict(test_features)
    train_results = model.predict(train_features)

    return (test_results, train_results)

Esempio n. 8

0

Mostra file

File: nbmatcher.py Progetto: kvpradap/magellan_scratch_1

class NBMatcher(MLMatcher):
    def __init__(self, *args, **kwargs):
        super(NBMatcher, self).__init__(*args, **kwargs)
        self.clf = GaussianNB(*args, **kwargs)
    def fit(self, X, Y):
        self.clf.fit(X, Y)
    def predict(self, X):
        self.clf.predict(X)

Esempio n. 9

0

Mostra file

File: explore_enron_data.py Progetto: texpine/ud120-projects

def bayes_test():
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    Y = np.array([1, 1, 1, 2, 2, 2])
    clf = GaussianNB()
    clf.fit(X, Y)
    print(clf.predict([[-0.8, -1]]))
    clf_pf = GaussianNB()
    clf_pf.partial_fit(X, Y, np.unique(Y))
    print(clf_pf.predict([[-0.8, -1]]))

Esempio n. 10

0

Mostra file

File: email_author_identification.py Progetto: linhbui/naive-bayes

def classify(features_train, labels_train, features_test, labels_test):
  classifier = GaussianNB()
  t0 = time()
  classifier.fit(features_train, labels_train)
  print "training time: ", round(time() - t0), "s"
  t1 = time()
  classifier.predict(features_test)
  print "predicting time: ", round(time() - t1), "s"
  return classifier.score(features_test, labels_test)

Esempio n. 11

0

Mostra file

File: bayes.py Progetto: rchibana/heartDiseaseIA

class TreeClassifier(Classifier):

    def __init__(self):
        self.classifier = GaussianNB()

    def do_train(self, X, y):
        self.classifier.fit(X, y)

    def do_classification(self, X, y):
        self.classifier.predict(X, y)

Esempio n. 12

0

Mostra file

File: inference.py Progetto: KawachiShota/position_estimation

class NaiveBayes:
    __theta = 0
    __sigma = 0

    def __init__(self):
        pass 
        #self.__new_data = 0

    def learning(self,x_data,y_data):
        self.rssi = np.loadtxt(x_data, delimiter=',')
        print(self.rssi)

        self.position = np.loadtxt(y_data, delimiter=',')
        print(self.position)

        self.gaussian_nb = GaussianNB()

        from sklearn.cross_validation import train_test_split
        rssi_train, rssi_test, position_train, position_test = train_test_split(self.rssi, self.position, random_state=0)

        self.gaussian_nb.fit(rssi_train,position_train)
        print("theta",self.gaussian_nb.theta_)
        print("sigma",self.gaussian_nb.sigma_)

        predicted = self.gaussian_nb.predict(rssi_test)

        print(metrics.accuracy_score(position_test, predicted))
    '''
    def set_params(self,theta,sigma):
        __theta = theta
        __sigma = sigma
        print __theta
        print __sigma
        '''

    def inference(self,r_data):
        self.predicted_class = self.gaussian_nb.predict(r_data)

        post_prob = self.gaussian_nb.predict_proba(r_data)
        log_prob = self.gaussian_nb.predict_log_proba(r_data)
        self.post_prob_float16 = post_prob.astype(np.float16)
        #E = 1*self.post_prob_float16[0][0]+2*self.post_prob_float16[0][1]+3*self.post_prob_float16[0][2]
        #var = (1*self.post_prob_float16[0][0]+4*self.post_prob_float16[0][1]+9*self.post_prob_float16[0][2])-E**2
        #print(self.post_prob_float16)
        #print(self.post_prob_float16[0])
        #print(var)
        print(self.predicted_class)
        #print(self.gaussian_nb.class_prior_)
        #print(log_prob)

        return self.predicted_class

    def output(self):
        output = graph.Graph()
        output.bar_graph(self.post_prob_float16[0])

Esempio n. 13

0

Mostra file

File: predictor.py Progetto: Searil/kimyazmis

def predict_author(arr, yazar_features, yazar_classes):
    results = []

    print "\n[DEBUG] K-NN result (neighbors: 10)"
    knn = KNeighborsClassifier(n_neighbors=10)
    knn.fit(yazar_features, yazar_classes)
    print knn.predict(arr)
    results.append(knn.predict(arr)[0])

    print "\n[DEBUG] SVC result (linear) (degree=3)"
    svc = svm.SVC(kernel='linear', degree=3)
    svc.fit(yazar_features, yazar_classes)
    print svc.predict(arr)
    results.append(svc.predict(arr)[0])

    print "\n[DEBUG] Logistic Regression result ()"
    regr = linear_model.LogisticRegression()
    regr.fit(yazar_features, yazar_classes)
    print regr.predict(arr)
    results.append(regr.predict(arr)[0])

    print "\n[DEBUG] Gaussian Naive Bayes"
    gnb = GaussianNB()
    gnb.fit(yazar_features, yazar_classes)
    print gnb.predict(arr)
    results.append(gnb.predict(arr)[0])

    print "\n[DEBUG] Decision Tree Classifier"
    dtc = tree.DecisionTreeClassifier()
    dtc.fit(yazar_features, yazar_classes)
    print dtc.predict(arr)
    results.append(dtc.predict(arr)[0])

    print "\n[DEBUG] Gradient Boosting Classification"
    gbc = GradientBoostingClassifier()
    gbc.fit(yazar_features, yazar_classes)
    print gbc.predict(arr)
    results.append(gbc.predict(arr)[0])

    # output = open('features.pkl', 'wb')
    # pickle.dump(yazar_features, output)
    # output.close()

    # output = open('classes.pkl', 'wb')
    # pickle.dump(yazar_classes, output)
    # output.close()

    # test_yazar_features = []        # for test data
    # test_yazar_classes = []         # for test classes
    # # yazar_features = []             # for train data
    # # yazar_classes = []              # for train classes

    return results

Esempio n. 14

0

Mostra file

File: Classifier.py Progetto: ChetanVashisht/Sentiment-Analysis

def trainer(dataset = "Features.csv"):
    # Train the various machine learning algorithms using the features extracted.
    data, labels = extractor(dataset)
    train, test, train_labels, test_labels = train_test_split(data, labels, test_size = 0.20, random_state = 42)
    names, expected_results = zip(*test_labels)
    names1, train_labels = zip(*train_labels)
    
    print 'S' + '\t' + 'H' + '\t' + 'F' + '\t' + 'A' + '\t' + 'N'
    
    # Random Forest Classifier
    rf = RandomForestClassifier(n_estimators = 100, n_jobs = 2)
    rf.fit(train, train_labels)
    results_boosting = rf.predict(test)
    conf_matrix = confusion_matrix(expected_results, results_boosting)
    print "Forset Classifier:\n"
    print conf_matrix
    accuracy_Boosting = float(np.trace(conf_matrix))/float(np.sum(conf_matrix))
    print accuracy_Boosting

    # KNN Classifier
    neigh = KNeighborsClassifier(n_neighbors=3)
    neigh.fit(train, train_labels)
    results_KNN = neigh.predict(test)
    conf_matrix = confusion_matrix(expected_results, results_KNN)
    print "KNN Classifier:\n"
    print conf_matrix
    accuracy_KNN = float(np.trace(conf_matrix))/float(np.sum(conf_matrix))
    print accuracy_KNN

    # Baye's Classifier
    clf = GaussianNB()
    clf.fit(train, train_labels)
    results_Bayes = clf.predict(test)
    conf_matrix = confusion_matrix(expected_results, results_Bayes)
    print "\nBayes Classifier:\n"
    print conf_matrix
    accuracy_Bayes = float(np.trace(conf_matrix))/float(np.sum(conf_matrix))
    print accuracy_Bayes

    # Neural Network
    clf = BernoulliNB()
    clf.fit(train, train_labels)
    results_NN = clf.predict(test)
    conf_matrix = confusion_matrix(expected_results, results_NN)
    print "\nNeural Network:\n"
    print conf_matrix
    accuracy_NN = float(np.trace(conf_matrix))/float(np.sum(conf_matrix))
    print accuracy_NN

    documenter(names, results_boosting, results_Bayes, results_NN, results_KNN, accuracy_Boosting, accuracy_Bayes, accuracy_NN, accuracy_KNN)

Esempio n. 15

0

Mostra file

File: classify.py Progetto: Prashant47/Intro-to-Machine-Learning-Udacity

def NBAccuracy(features_train, labels_train, features_test, labels_test):
    """ compute the accuracy of your Naive Bayes classifier """
    ### import the sklearn module for GaussianNB
    from sklearn.naive_bayes import GaussianNB

    ### create classifier
    clf = GaussianNB()
    

    ### fit the classifier on the training features and labels
    clf.fit(features_train, labels_train)
    

    ### use the trained classifier to predict labels for the test features
    pred = clf.predict(features_test)


    ### calculate and return the accuracy on the test data
    ### this is slightly different than the example, 
    ### where we just print the accuracy
    ### you might need to import an sklearn module
    intersect = [i for i, j in zip(pred, labels_test) if i == j]
    matched = len(intersect)
    total = len(labels_test)
    accuracy = float(matched) / float(total)
    return accuracy

Esempio n. 16

0

Mostra file

File: color_classifier.py Progetto: uf-mil/software-common

class GaussianColorClassifier(ContourClassifier):
    '''
    A contour classifier which classifies a contour
    based on it's mean color in BGR, HSV, and LAB colorspaces,
    using a Gaussian classifier for these features.

    For more usage info, see class ContourClassifier
    '''
    FEATURES = ['B', 'G', 'R', 'H', 'S', 'V', 'L', 'A', 'B']

    def __init__(self, classes, **kwargs):
        super(GaussianColorClassifier, self).__init__(classes, **kwargs)
        self.classifier = GaussianNB()

    def get_features(self, img, mask):
        mean = cv2.mean(img, mask)
        mean = np.array([[mean[:3]]], dtype=np.uint8)
        mean_hsv = cv2.cvtColor(mean, cv2.COLOR_BGR2HSV)
        mean_lab = cv2.cvtColor(mean, cv2.COLOR_BGR2LAB)
        features = np.hstack((mean.flatten(), mean_hsv.flatten(), mean_lab.flatten()))
        return features

    def classify_features(self, features):
        return self.classifier.predict(features)

    def feature_probabilities(self, features):
        return self.classifier.predict_proba(features)

    def train(self, features, classes):
        self.classifier.fit(features, classes)

Esempio n. 17

0

Mostra file

File: numpyreadallalgo.py Progetto: sibrajas/data-python

def categorize(train_data,test_data,train_class,n_features):
    #cf= ExtraTreesClassifier()
    #cf.fit(train_data,train_class)
    #print (cf.feature_importances_)
    
    #lsvmcf = sklearn.svm.LinearSVC(penalty='l2', loss='l2', dual=True, tol=0.0001, C=100.0)  
    model = LogisticRegression()
    lgr = LogisticRegression(C=100.0,penalty='l1')    
    #knn = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=10, p=2, metric='minkowski', metric_params=None)
    svmlcf = sklearn.svm.SVC(C=1000.0, kernel='linear', degree=1, gamma=0.01,  probability=True)#2
    svmcf = sklearn.svm.SVC(C=1000.0, kernel='rbf', degree=1, gamma=0.01,  probability=True)#2
    cf = DecisionTreeClassifier() 
    dct = DecisionTreeClassifier(criterion='gini', splitter='best',  min_samples_split=7, min_samples_leaf=4)
    rf = RandomForestClassifier(n_estimators=10, criterion='gini',  min_samples_split=7, min_samples_leaf=4, max_features='auto')
    gnb = GaussianNB()  #1
    adbst = sklearn.ensemble.AdaBoostClassifier(base_estimator=rf, n_estimators=5, learning_rate=1.0, algorithm='SAMME.R', random_state=True)

    #ch2 = SelectKBest(chi2, k=n_features)
    #train_data = ch2.fit_transform(train_data, train_class)
    #test_data = ch2.transform(test_data)

    #rfe = RFE(svmlcf,n_features)
    #rfe = rfe.fit(train_data, train_class)
    gnb.fit(train_data,train_class)
    return gnb.predict(test_data)

Esempio n. 18

0

Mostra file

File: classify.py Progetto: LevinJ/ud120-projects

def NBAccuracy(features_train, labels_train, features_test, labels_test):
    """ compute the accuracy of your Naive Bayes classifier """
    ### import the sklearn module for GaussianNB
    from sklearn.naive_bayes import GaussianNB

    ### create classifier
    clf = GaussianNB()

    ### fit the classifier on the training features and labels
    clf.fit(features_train, labels_train)

    ### use the trained classifier to predict labels for the test features
    pred = clf.predict(features_test)
    


    ### calculate and return the accuracy on the test data
    ### this is slightly different than the example, 
    ### where we just print the accuracy
    ### you might need to import an sklearn module
    total = len(labels_test)
    correct = (pred == labels_test).sum()
    accuracy = correct/float(total)
    from sklearn.metrics import accuracy_score
    
    accuracy = accuracy_score(labels_test,pred )
    return accuracy

Esempio n. 19

0

Mostra file

File: nb_author_id.py Progetto: dixu-ca/ud120-projects

def NBAccuracy(features_train, labels_train, features_test, labels_test):
    """ compute the accuracy of your Naive Bayes classifier """
    ### import the sklearn module for GaussianNB
    from sklearn.naive_bayes import GaussianNB

    ### create classifier
    clf = GaussianNB()

    t0 = time()
    ### fit the classifier on the training features and labels
    clf.fit(features_train, labels_train)
    print "training time:", round(time()-t0, 3), "s"

    ### use the trained classifier to predict labels for the test features
    import numpy as np
    t1 = time()
    pred = clf.predict(features_test)
    print "predicting time:", round(time()-t1, 3), "s"

    ### calculate and return the accuracy on the test data
    ### this is slightly different than the example,
    ### where we just print the accuracy
    ### you might need to import an sklearn module
    accuracy = clf.score(features_test, labels_test)
    return accuracy

Esempio n. 20

0

Mostra file

File: NB.py Progetto: lxc-xx/kaggle_tradeshift

def main(argv):
    if len(argv) != 5:
        print "./NB_train_pred.py train.csv train_lable test.csv save_folder label_idx"
        sys.exit(1);

    output_folder = argv[3]
    label_idx = int(argv[4])

    os.system("mkdir " + output_folder)

    print "Loading training data"
    train_array = np.load(argv[0])
    print "Loading training label"
    train_label_array = np.load(argv[1])
    print "Loading test data"
    test_array = np.load(argv[2])
    
    print "building NB on label " + str(label_idx)
    gnb = GaussianNB() 
    model = gnb.fit(train_array[:, 1:], train_label_array[1:, label_idx]) 

    print "predicting label " + str(label_idx)
    nb_pred = gnb.predict(test_array[:,1:])
    print "save the result"
    with open(output_folder + "/" + str(label_idx) + ".pred", 'w') as pred_file:
        pred_file.write("\n".join([ str(x) for x in nb_pred.tolist()]))
    with open(output_folder+"/"+str(label_idx) + ".npy", 'wb') as npy_file:
        np.save(npy_file, nb_pred)

Esempio n. 21

0

Mostra file

File: SSL-8.py Progetto: IamCatkin/Learning-Python

def gnbmodel(d,X_2,y_2,X_3,y_3,X_test,y_test):
    X_3_copy = X_3.copy(deep=True)
    X_3_copy['chance']=0
    index = 0    
    
########## k折交叉验证 ###########################
    scores = cross_val_score(GaussianNB(), X_2, y_2, cv=5, scoring='accuracy')
    score_mean =scores.mean()
    print(d+'5折交互检验:'+str(score_mean))
#################################################
    
    gnb = GaussianNB().fit(X_2,y_2)

################ 预测测试集 ################   
    answer_gnb = gnb.predict(X_test)
    accuracy = metrics.accuracy_score(y_test,answer_gnb)
    print(d+'预测:'+str(accuracy))
###############################################
    
    chance = gnb.predict_proba(X_3)[:,1]
    for c in chance:
        X_3_copy.iloc[index,len(X_3_copy.columns)-1]=c
        index += 1
    chance_que = X_3_copy.iloc[:,len(X_3_copy.columns)-1]
    return chance_que

Esempio n. 22

0

Mostra file

File: nb_author_id.py Progetto: avasilescu/ud120-projects

def NBAccuracy(features_train, labels_train, features_test, labels_test):
	#Import sklearn modules for GaussianNB
	from sklearn.naive_bayes import GaussianNB
	from sklearn.metrics import accuracy_score
	
	#Create classifer
	classifer = GaussianNB();
	
	#Timing fit algorithm
	t0 = time();
	
	#Fit classier on the training features
	classifer.fit(features_train, labels_train);
	
	print "Training Time: ", round(time() - t0, 3), "s";
	
	GaussianNB();
	
	#Timing prediction algorithm
	t0=time();
	
	#Use trained classifer to predict labels for test features
	pred = classifer.predict(features_test);
	
	print "Prediction Time: ", round(time() - t0, 3), "s";
	
	#Calculate accuracy from features_test with answer in labels_test
	
	accuracy = accuracy_score(pred, labels_test);
	
	return accuracy;

Esempio n. 23

0

Mostra file

File: testreuters1.py Progetto: vidgit/Feature_Selection

def classifier(model,X,X1,y,y1):
    t0 = time.time()
    if model=='gnb':
        print 'GNB'
        gnb = GaussianNB().fit(X, y)
    elif model=='mnb':
        print 'MNB'
        gnb = MultinomialNB().fit(X,y)
    elif model=='bnb':
        print 'BNB'
        gnb = BernoulliNB().fit(X, y)
    elif model=='lin':
        print 'Linear SVM'
        gnb = svm.SVC(kernel='linear', C=0.5).fit(X, y)
    elif model=='rbf':
        print 'RBF SVM'
        gnb = svm.SVC().fit(X, y)
    elif model=='poly':
        print 'Poly SVM'
        gnb = svm.SVC(kernel='poly', degree=2).fit(X, y)
    elif model=='rfc':
        print 'Random Forest'
        gnb = RandomForestClassifier(max_depth=10, n_estimators=100, max_features=5).fit(X, y)
    elif model=='lr':
        print 'Logistic Regression'
        gnb = LogisticRegression().fit(X, y)
    elif model=='knn':
        print "K nearest neighbours"
        gnb = KNeighborsClassifier(n_neighbors=6).fit(X, y)
    y_pred = gnb.predict(X1)
    print accuracy_score(y1, y_pred), f1_score(y1, y_pred)
    print time.time() - t0

Esempio n. 24

0

Mostra file

File: nb.py Progetto: mkdmkk/infaas

class PatternBasedDiagnosis:
    """
    Pattern Based Diagnosis with Decision Tree
    """

    __slots__ = [
        "model"
    ]

    def __init__(self):
        pass

    def train(self, data, labels):
        """
        Train the decision tree with the training data
        :param data:
        :param labels:
        :return:
        """
        print('Training Data: %s' % (data))
        print('Training Labels: %s' % (labels))
        self.model = GaussianNB()
        self.model = self.model.fit(data, labels)

    def eval(self, obs):
        # print('Testing Result: %s; %s' % (self.model.predict(obs), self.model.predict_proba(obs)))
        print('Testing Result: %s' % self.model.predict(obs))

Esempio n. 25

0

Mostra file

File: trainModel.py Progetto: spacegoing/ALTA2015Contest

def getGaussianPred(featureMatrix, labels, testSet, testSet_docIndex):
    """
    All input arguments are return of getTrainTestData()
    :param featureMatrix:
    :param labels:
    :param testSet:
    :param testSet_docIndex:
    :return docIndexPred: dict{docid: [index1, index2, ...], ...}
                        key is docid
                        value is all cognates' index
    """
    gnb = GaussianNB()
    gnb.fit(featureMatrix, labels)
    # pred = gnb.predict(featureMatrix)
    pred = gnb.predict(testSet)

    docIndexPred = dict()

    for i, p in enumerate(pred):
        if p:
            docid = testSet_docIndex[i, 0]
            index = testSet_docIndex[i, 1]
            if docid in docIndexPred:
                docIndexPred[docid].append(index)
            else:
                docIndexPred[docid] = [index]

    return docIndexPred

Esempio n. 26

0

Mostra file

File: regularized.py Progetto: iskandr/data-experiments

class RegularizedGaussianNB:
  """
  Three types of regularization are possible:
    - regularized the variance of a feature within a class toward the 
      average variance of all features from that class
    - regularize the variance of a feature within a class toward its
      pooled variance across all classes
    - add some constant amount of variance to each feature
  In practice, the latter seems to work the best, though the regularization
  value should be cross-validated. 
  """
  def __init__(self, avg_weight = 0, pooled_weight = 0, extra_variance = 0.1):
    self.pooled_weight = pooled_weight
    self.avg_weight = avg_weight
    self.extra_variance = extra_variance
    self.model = GaussianNB()
    
  def fit(self, X,Y):
    self.model.fit(X,Y)
    p = self.pooled_weight
    a = self.avg_weight
    ev = self.extra_variance 
    original_weight = 1.0 - p - a
    pooled_variances = np.var(X, 0)
    for i in xrange(self.model.sigma_.shape[0]):
      class_variances = self.model.sigma_[i, :]
      new_variances = original_weight*class_variances + \
        p * pooled_variances + \
        a * np.mean(class_variances) + \
        ev 
      self.model.sigma_[i, :] = new_variances
        
        
  def predict(self, X):
    return self.model.predict(X)

Esempio n. 27

0

Mostra file

File: poi_id.py Progetto: yielder/identifying-fraud-from-enron-email

def univariateFeatureSelection(f_list, my_dataset):
	result = []
	for feature in f_list:
		# Replace 'NaN' with 0
		for name in my_dataset:
			data_point = my_dataset[name]
			if not data_point[feature]:
				data_point[feature] = 0
			elif data_point[feature] == 'NaN':
				data_point[feature] =0

		data = featureFormat(my_dataset, ['poi',feature], sort_keys = True, remove_all_zeroes = False)
		labels, features = targetFeatureSplit(data)
		features = [abs(x) for x in features]
		from sklearn.cross_validation import StratifiedShuffleSplit
		cv = StratifiedShuffleSplit(labels, 1000, random_state = 42)
		features_train = []
		features_test  = []
		labels_train   = []
		labels_test    = []
		for train_idx, test_idx in cv:
			for ii in train_idx:
				features_train.append( features[ii] )
				labels_train.append( labels[ii] )
			for jj in test_idx:
				features_test.append( features[jj] )
				labels_test.append( labels[jj] )
		from sklearn.naive_bayes import GaussianNB
		clf = GaussianNB()
		clf.fit(features_train, labels_train)
		predictions = clf.predict(features_test)
		score = score_func(labels_test,predictions)
		result.append((feature,score[0],score[1],score[2]))
	result = sorted(result, reverse=True, key=lambda x: x[3])
	return result

Esempio n. 28

0

Mostra file

File: custom.py Progetto: RPI-WCL/pilots

class CruiseAlgorithm(object):
	# cruise algorithm is used to classify the cruise phase vs noncruise phase, it uses the differential change in data stream as the input matrix
	def __init__(self, testing=False):
		self.core = GaussianNB()
		self.scaler = RobustScaler()
		self.X_prev = None
		self.testing = testing
	def fit(self,X,Y): # Y should be the label of cruise or not
		X = self.prepare(X)
		self.core.fit(X,Y.ravel())
	def predict(self, X):
		if self.testing:
			X_t = self.prepare(X)
		else:
			if self.X_prev:
				X_t = X - self.X_prev
			else:
				X_t = X
			self.X_prev = X

		print repr(X_t)
		prediction_result = self.core.predict(X_t)
		return np.asmatrix(prediction_result)

	def prepare(self,X):
		a = np.zeros((X.shape[0],X.shape[1]))
		for i in xrange(X.shape[0]-1):
			a[i+1,:] = X[i+1] - X[i]
		return a

Esempio n. 29

0

Mostra file

File: util.py Progetto: AriannaYuan/GeomDeepNeuralNet

def myClassifier(X,Y,model,CV=4, scoreType='pure'):
    # X = [[0, 0], [1, 1],[1, 2]]
    # y = [0, 1, 2]
    score = {}
    print "Error Analysis using", scoreType
    if model == "SVM":
        clf = svm.SVC(probability=True, random_state=0, kernel='rbf')        
        #clf = svm.SVR(cache_size=7000)        
        
    elif model == "LR":
        clf = linear_model.LogisticRegression()
        clf.fit(X, Y)        

    elif model == "NB":
         clf = GaussianNB()
         clf.fit(X, Y)
         
    elif model=='MLP': # multilayer perceptron
         clf = MLPClassifier( hidden_layer_sizes=[100],algorithm='l-bfgs')
         clf.fit(X, Y)
    
    if scoreType == 'cv':     
        accu = np.mean(cross_validation.cross_val_score(clf, X, Y, scoring='accuracy',cv=CV))
    elif scoreType == 'pure':  
        predictions=clf.predict(X)
        accu = sum([int(predictions[q]==Y[q]) for q in range(len(Y))])/len(Y)        
    return accu, clf

Esempio n. 30

0

Mostra file

File: poi_id.py Progetto: yielder/identifying-fraud-from-enron-email

def selectKBest(previous_result, data):
	# remove 'restricted_stock_deferred' and 'director_fees'
	previous_result.pop(4)
	previous_result.pop(4)

	result = []
	_k = 10
	for k in range(0,_k):
		feature_list = ['poi']
		for n in range(0,k+1):
			feature_list.append(previous_result[n][0])

		data = featureFormat(my_dataset, feature_list, sort_keys = True, remove_all_zeroes = False)
		labels, features = targetFeatureSplit(data)
		features = [abs(x) for x in features]
		from sklearn.cross_validation import StratifiedShuffleSplit
		cv = StratifiedShuffleSplit(labels, 1000, random_state = 42)
		features_train = []
		features_test  = []
		labels_train   = []
		labels_test    = []
		for train_idx, test_idx in cv:
			for ii in train_idx:
				features_train.append( features[ii] )
				labels_train.append( labels[ii] )
			for jj in test_idx:
				features_test.append( features[jj] )
				labels_test.append( labels[jj] )
		from sklearn.naive_bayes import GaussianNB
		clf = GaussianNB()
		clf.fit(features_train, labels_train)
		predictions = clf.predict(features_test)
		score = score_func(labels_test,predictions)
		result.append((k+1,score[0],score[1],score[2]))
	return result

Esempio n. 31

0

Mostra file

    with open('temp.apk', 'wb') as f:
        while size > 0:
            data = client.recv(1024)
            f.write(data)
            size -= len(data)
    print('APK Saved')
    ap = apk.APK('Apps/temp.apk')
    per = ap.get_permissions()
    permissions = []

    for line in per:
        curr = ''
        for i in reversed(line):
            if i != '.':
                curr += i
            else:
                break
        curr = curr[::-1]
        permissions.append(curr)
    P = np.genfromtxt('Training/Perdiction.csv', delimiter=',')
    for i in permissions:
        if get_index.get(i) != None:
            P[get_index.get(i)] = 1.0

    result = ''
    if clf.predict([P]) == 0.0:
        result = 'Non-Malicious'
    else:
        result = 'Malicious'
    client.sendall(result)
    client.close()

Esempio n. 32

0

Mostra file

File: main.py Progetto: nyavuzcan/PyhtonDataMining

from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, confusion_matrix
dataset=pd.read_csv('divorce.csv',delimiter=";")
a=dataset.drop_duplicates()


print("DUPLICATE SONRASI YENİ VERİ SAYIMIZ:")
print(len(a))

X=dataset.iloc[:,0:54]
y=dataset["Class"]
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20, random_state=1)
gaussian_bayes = GaussianNB()
gaussian_bayes.fit(X_train,y_train.values.ravel())
y_pred = gaussian_bayes.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))
print("CONFUSION MATRIX")
print(confusion_matrix(y_test, y_pred))


print("Accuracy:",metrics.accuracy_score(y_test, y_pred))


#PRECIOSN SCORE
from sklearn.metrics import precision_score, roc_auc_score

print("Precision")
print(precision_score(y_test, y_pred, average='weighted'))

Esempio n. 33

0

Mostra file

# Read pixel values into X, read class values into y
df_X = pandas.read_csv("../../data/x_train_gr_smpl.csv")
df_y = pandas.read_csv("../../data/y_train_smpl.csv")

# Shuffle the order of the data (keeping the X and y rows in sync)
df_X, df_y = shuffle(df_X, df_y)

# Split dataset into training and testing set, 90% and 10%, respectively
X_train, X_test, y_train, y_test = train_test_split(df_X,
                                                    df_y,
                                                    test_size=0.1,
                                                    random_state=0)

naive_bayes = GaussianNB()
classifier = naive_bayes.fit(X_train, y_train)
y_predicted = naive_bayes.predict(X_test)
print("\nNaive Bayes accuracy score: ",
      round(metrics.accuracy_score(y_test, y_predicted) * 100, 2), "%\n")

# Plot non-normalized confusion matrix
labels = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]

np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
titles_options = [("Confusion matrix, without normalization", None),
                  ("Normalized confusion matrix", 'true')]

for title, normalize in titles_options:
    disp = plot_confusion_matrix(classifier,
                                 X_test,

Esempio n. 34

0

Mostra file

                                      outcome_feature,
                                      test_size=0.5,
                                      random_state=0)

###
### Define Classifier
###

clf = GaussianNB()

###
### Train Classifier on (X1,Y1) and Validate on (X2,Y2)
###

clf.fit(X_1, Y_1)
score = clf.score(X_2, Y_2)
print("accuracy: {0}".format(score.mean()))

###
### Print Confusion Matrix
###

output = clf.predict(X_2)

matrix = confusion_matrix(output, Y_2)
print(matrix)

###
### Save Classifier
###
joblib.dump(clf, 'model/nb.pkl')

Esempio n. 35

0

Mostra file

File: cardiopred.py Progetto: hh28928/coronary_heart_disease_risk_factor_prediction

def NB(train, test, pred):
    naive = GaussianNB()
    naive.fit(train, pred)
    return naive.predict(test)

Esempio n. 36

0

Mostra file

File: Semi_supervised_learning.py Progetto: acsum/task4

y_train_labeled = train_labeled['y']
x_train_labeled = train_labeled._drop_axis(['y'], axis=1)
x_train_unlabeled = train_unlabeled

#Switch to numpy
# Preprocessing X
x_train = []
x_train_labeled = np.array(x_train_labeled)
x_train_unlabeled = np.array(x_train_unlabeled)
x_train.extend(x_train_labeled)
x_train.extend(x_train_unlabeled)
x_test = np.array(test)

# Preprocessing y
y_train_labeled = np.array(y_train_labeled)
ones = -1 * np.ones(21000)
ones = np.array(ones)
y_train = np.concatenate((y_train_labeled, ones))

# Trying Gaussian Naive Bayes
gnb = GaussianNB()
gnb.fit(x_train, y_train)
y_pred = gnb.predict(x_test)

# output results
d = {'Id': test.index, 'y': y_pred}
output = pd.DataFrame(d)
output.to_csv('output1.csv', index=False)

# from sklearn.metrics import accuracy_score
# acc = accuracy_score(y, y_pred)

Esempio n. 37

0

Mostra file

File: iris_classification.py Progetto: amarrerod/DataScience

# Entrenamiento Supervisado: Clasificacion de Iris

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Cargamos los datos
iris = sns.load_dataset('iris')
x_iris = iris.drop(
    'species', axis='columns')  # Eliminamos el campo especie de las columnas
y_iris = iris['species']

# Dividimos los datos en dos conjuntos: entrenamiento y testeo
xtrain, xtest, ytrain, ytest = train_test_split(x_iris, y_iris, random_state=1)
model = GaussianNB()
print("Entrenando el Modelo GaussianNB...")
model.fit(xtrain, ytrain)
print("Evaluando nuevos datos...")
ymodel = model.predict(xtest)
print("Precision final: {}".format(accuracy_score(ytest, ymodel)))

Esempio n. 38

0

Mostra file

File: gnb-program6.py Progetto: Abhishek-0673/7th-SEM-CSE-ML-LAB-PROGRAMS

conducted between 1958 and 1970 at the University 
of Chicago's Billings Hospital on the survival of 
patients who had undergone surgery for cancer

1. Age of patient at time of operation (numerical) 
2. Patient's year of operation (year - 1900, numerical) 
3. Number of positive axillary nodes detected (numerical) 
4. Survival status (class attribute) 
-- 1 = the patient survived 5 years or longer 
-- 2 = the patient died within 5 year
'''
c1, c2, c3, c4 = np.loadtxt('data.csv', unpack=True, delimiter=',')
x = np.column_stack((c1, c3))
y = c4
# Create NaiveBayes Classifier
clf = GaussianNB()
# fit the mode
clf.fit(x, y)
# make predictions
predictions = clf.predict(x)

# calculate accuracy
print(accuracy_score(y, predictions))

from matplotlib import pyplot as plt

plt.scatter(c1, c3, c=c4)
plt.colorbar(ticks=[1, 2])
plt.xlabel("Age of the patient")
plt.ylabel("No of positive axillary nodes")

Esempio n. 39

0

Mostra file

File: diabetes.py Progetto: SURAJGITA/diabetes-prediction

                                                    test_size=0.3,
                                                    random_state=109)

# In[36]:

X_train.shape, X_test.shape, y_train.shape, y_test.shape

# In[37]:

# Train the model using the training sets
model.fit(X_train, y_train)

# In[38]:

#Predict the response for test dataset
y_pred = model.predict(X_test)

# In[39]:

#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics

# Model Accuracy, how often is the classifier correct?
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

# In[42]:

## Apply Algorithm

from sklearn.ensemble import RandomForestClassifier
random_forest_model = RandomForestClassifier(random_state=10)

Esempio n. 40

0

Mostra file

File: naive_bayes_risco_credito.py Progetto: Mizzzael/Machine-Learning-Udemy-Course

# -*- coding: utf-8 -*-

import pandas as pd

base = pd.read_csv('risco_credito.csv')
previsores = base.iloc[:, 0:4].values
classe = base.iloc[:, 4].values

from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder

labelEncoder = LabelEncoder()

previsores[:, 0] = labelEncoder.fit_transform(previsores[:, 0])
previsores[:, 1] = labelEncoder.fit_transform(previsores[:, 1])
previsores[:, 2] = labelEncoder.fit_transform(previsores[:, 2])
previsores[:, 3] = labelEncoder.fit_transform(previsores[:, 3])

classificador = GaussianNB()
classificador.fit(previsores, classe)

# história boa, dívida alta, garantias nenhuma, renda > 35
# história ruim, dívida alta, garantias adequada, renda < 15

resultado = classificador.predict([[0, 0, 1, 2], [2, 0, 0, 0]])
print(classificador.classes_)
print(classificador.class_count_)
print(classificador.class_prior_)

Esempio n. 41

0

Mostra file

File: wifi_localization_modified.py Progetto: joshua-hong-98/Visualizations

  features2[np.isnan(features2)] = -100

  #TODO 1: Compute the cosine similarity matrix of your own wifi signal strength
# def cosine_similarity(a,b):
#   numerator = np.dot(a,b)
#   x = np.sqrt(np.sum(np.square(a)))
#   y = np.sqrt(np.sum(np.square(b)))
#   denominator = x*y 
#   return numerator/denominator  

def new_matrix(num):
  matrix=np.zeros((num.shape[0], num.shape[0]))
  for x in range(num.shape[0]):
    for y in range(num.shape[0]):
      matrix[x,y]=cosine_similarity(num[x,:],num[y,:])
  return matrix

similarity_matrix = new_matrix(features)
plot_consine_similarity(similarity_matrix, labels)

print(new_matrix(features).shape[0])
  #TODO 2: Compute the cosine similarity matrix of two different people's wifi scans
similarity_matrix2 = new_matrix(features2)
plot_consine_similarity(similarity_matrix2, labels2)

  #TODO 3: Classify the location of the other person
clf= GaussianNB()
clf.fit(np.array(features2[:14]), np.array(labels2[:14]))
for i in range(14,35):
    print(clf.predict(features2[i]))

Esempio n. 42

0

Mostra file

y = dataset.iloc[:, 8:9].values

# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=0)

# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

# save the model to disk
filename = 'Naive Bayes Diabetes.sav'
pickle.dump(classifier, open(filename, 'wb'))

# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))

# Predicting the Test set results
result = loaded_model.score(X_test, y_test)
print("Test score: {0:.2f} %".format(100 * result))

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)

Esempio n. 43

0

Mostra file

class GNB(object):
    def __init__(self):
        self.possible_labels = ['left', 'keep', 'right']
        self.clf = GaussianNB()
        #self.clf = ExtraTreesClassifier(n_estimators=20, max_depth=45, min_samples_split=4, random_state=0)
        #self.clf = MLPClassifier(hidden_layer_sizes=(4000),
        #alpha=1e-8, momentum=.7, verbose=True, tol=1e-7, max_iter=400)
        self.scaler = StandardScaler()

    def train(self, data, labels):
        """
		Trains the classifier with N data points and labels.

		INPUTS
		data - array of N observations
		  - Each observation is a tuple with 4 values: s, d, 
		    s_dot and d_dot.
		  - Example : [
			  	[3.5, 0.1, 5.9, -0.02],
			  	[8.0, -0.3, 3.0, 2.2],
			  	...
		  	]

		labels - array of N labels
		  - Each label is one of "left", "keep", or "right".
		"""
        #print(data)
        #print(labels)
        #x = [[i[0], i[2], i[3], i[1]%4] for i in data]
        x = [[i[3]] for i in data]
        #print(len(x))
        #self.clf.fit(x, labels)

        #self.scaler.fit(data[0])
        #data = self.scaler.transform(data)
        self.clf.fit(x, labels)

    def predict(self, observation):
        """
		Once trained, this method is called and expected to return 
		a predicted behavior for the given observation.

		INPUTS

		observation - a 4 tuple with s, d, s_dot, d_dot.
		  - Example: [3.5, 0.1, 8.5, -0.2]

		OUTPUT

		A label representing the best guess of the classifier. Can
		be one of "left", "keep" or "right".
		"""
        # TODO - complete this

        #i = self.scaler.transform([observation])
        i = [observation[3]]
        #prediction = self.clf.predict([[i[1], i[2], i[3], i[1]%4]])
        prediction = self.clf.predict(i)
        #print(prediction)

        return prediction

Esempio n. 44

0

Mostra file

File: train.py Progetto: devilJhackz/hackertest

accuracy = knn.score(xtest, ytest)
print(accuracy)

# creating a confusion matrix
knn_predictions = knn.predict(x_test)
'''
'''
from sklearn.tree import DecisionTreeClassifier
dtree_model = DecisionTreeClassifier(max_depth = 7).fit(xtrain, ytrain)
dtree_predictions = dtree_model.predict(x_test)
list=[]
'''
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB().fit(xtrain, ytrain)
gnb_predictions = gnb.predict(x_test)

# accuracy on X_test
accuracy = gnb.score(xtest, ytest)
print(accuracy)
list = []
for i in gnb_predictions:
    '''
    list.append(i)
    temp=list[i]
    list.append(out[temp])
    '''
    list.append(out[i])
    print(out[i])
##

Esempio n. 45

0

Mostra file

data = digits.images.reshape((n_samples, -1))  #???

classifier = GaussianNB()
#MLPClassifier(alpha=1, hidden_layer_sizes=(25, 15), random_state=1)
#svm.SVC(gamma=1)#KNeighborsClassifier(3)#GaussianNB()
filename = "naive_bayes.bin"

#Traing model with labelled data!!!
classifier.fit(data[:int(n_samples * 2 / 3)],
               digits.target[:int(n_samples * 2 / 3)])

#Save trained model to disk and reload it
_ = joblib.dump(classifier, filename)
classifier = joblib.load(filename)

predicted = classifier.predict(data[int(n_samples / 3):])
expected = digits.target[int(n_samples / 3):]

print("Classification report for classifier %s:\n%s\n" %
      (classifier, metrics.classification_report(expected, predicted)))
images_and_predictions = list(
    zip(digits.images[int(n_samples / 3):], predicted))
x = randint(0, int(n_samples / 3))  #to show different examples each time

for index, (image, prediction) in enumerate(images_and_predictions[x:x + 21]):
    plt.subplot(3, 7, index + 1)
    plt.axis('off')
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    plt.title('%i(%i)' % (prediction, expected[x + index]))

plt.show()

Esempio n. 46

0

Mostra file

previsores[:, 6] = labelencoder_previsores.fit_transform(previsores[:, 6])
previsores[:, 7] = labelencoder_previsores.fit_transform(previsores[:, 7])
previsores[:, 8] = labelencoder_previsores.fit_transform(previsores[:, 8])
previsores[:, 9] = labelencoder_previsores.fit_transform(previsores[:, 9])
previsores[:, 13] = labelencoder_previsores.fit_transform(previsores[:, 13])

onehotencoder = OneHotEncoder(categorical_features=[1, 3, 5, 6, 7, 8, 9, 13])
previsores = onehotencoder.fit_transform(previsores).toarray()

labelencoder_classe = LabelEncoder()
classe = labelencoder_classe.fit_transform(classe)

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
previsores = scaler.fit_transform(previsores)

from sklearn.model_selection import train_test_split
previsores_treinamento, previsores_teste, classe_treinamento, classe_teste = train_test_split(
    previsores, classe, test_size=0.15, random_state=0)

from sklearn.naive_bayes import GaussianNB
classificador = GaussianNB()
classificador.fit(previsores_treinamento, classe_treinamento)

# Resultado da previsão
previsoes = classificador.predict(previsores_teste)

# Verifica o percentual de acerto
from sklearn.metrics import confusion_matrix, accuracy_score
precisao = accuracy_score(classe_teste, previsoes)
matriz = confusion_matrix(classe_teste, previsoes)

Esempio n. 47

0

Mostra file

                                                                           0].values.tolist(
                                                                           )

# Test
test_x, test_y = read_test_class.iloc[:,
                                      1:].values, read_test_class.iloc[:,
                                                                       0].values.tolist(
                                                                       )

# =============================================================================
# TRADITIONAL MACHINE LEARNING ALGORITHMS
# =============================================================================
print("Training Gaussian Naive Bayes classifier:")
my_classifier = GaussianNB(priors=None)
my_classifier.fit(train_x, train_y)
pred_lbl_GNB = my_classifier.predict(test_x)  # Prediction label/class
pred_prb_GNB = my_classifier.predict_proba(test_x)
# predict probability for all target labels

print(" Training Random Forest classifier:")
my_classifier = RandomForestClassifier(max_depth=10, n_estimators=30)
my_classifier.fit(train_x, train_y)
pred_lbl_RFC = my_classifier.predict(test_x)  # Prediction label/class
pred_prb_RFC = my_classifier.predict_proba(test_x)
# predict probability for all target labels

print(" Training Nearest Neighbors classifier:")
n_neighbors = 100
# Optional (default = 5)
weights = 'uniform'  # str or callable, optional (default = 'uniform'), 'distance'
algorithm = 'kd_tree'  # {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional

Esempio n. 48

0

Mostra file

def test_gnb_prior_large_bias():
    """Test if good prediction when class prior favor largely one class"""
    clf = GaussianNB(priors=np.array([0.01, 0.99]))
    clf.fit(X, y)
    assert clf.predict([[-0.1, -0.1]]) == np.array([2])

Esempio n. 49

0

Mostra file

File: titanic-eda-to-ml-beginner.py Progetto: ajmal017/data-journey

logreg = LogisticRegression()
logreg.fit(X_train,y_train)
pred_logreg = logreg.predict(X_test)
print(confusion_matrix(y_test, pred_logreg))
print(classification_report(y_test, pred_logreg))
print(accuracy_score(y_test, pred_logreg))
logreg.fit(X_train_all, y_train_all)
pred_all_logreg = logreg.predict(X_test_all)
sub_logreg = pd.DataFrame()
sub_logreg['PassengerId'] = df_test['PassengerId']
sub_logreg['Survived'] = pred_all_logreg
#sub_logmodel.to_csv('logmodel.csv',index=False)
from sklearn.naive_bayes import GaussianNB
gnb=GaussianNB()
gnb.fit(X_train,y_train)
pred_gnb = gnb.predict(X_test)
print(confusion_matrix(y_test, pred_gnb))
print(classification_report(y_test, pred_gnb))
print(accuracy_score(y_test, pred_gnb))

from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=20)
knn.fit(X_train_sc,y_train_sc)
pred_knn = knn.predict(X_test)
print(confusion_matrix(y_test, pred_knn))
print(classification_report(y_test, pred_knn))
print(accuracy_score(y_test, pred_knn))
knn.fit(X_train_all, y_train_all)
pred_all_knn = knn.predict(X_test_all)
sub_knn = pd.DataFrame()
sub_knn['PassengerId'] = df_test['PassengerId']

Esempio n. 50

0

Mostra file

checkpointer = ModelCheckpoint(filepath='best_weights.hdf5',
                               verbose=1,
                               save_best_only=True)
model.fit(x_train,
          y_train,
          validation_data=(x_test, y_test),
          callbacks=[monitor, checkpointer],
          epochs=1)
#print(history.history.keys())

feat_train = model.predict(x_train)
feat_test = model.predict(x_test)
gnb = GaussianNB()
gnb.fit(feat_train, np.argmax(y_train, axis=1))
print("trainning score...", gnb.score(feat_train, np.argmax(y_train, axis=1)))
print("testing score...", gnb.score(feat_test, np.argmax(y_test, axis=1)))
pred_labels = gnb.predict(feat_test)
probas = gnb.predict_proba(feat_test)
confusion_matrix = metrics.confusion_matrix(np.argmax(y_test, axis=1),
                                            pred_labels)
print("\n\nConfusion Matrix {} %".format(confusion_matrix))
classification_report = metrics.classification_report(np.argmax(y_test,
                                                                axis=1),
                                                      pred_labels,
                                                      target_names=outcome)
print("\n\nClassifiction Scores {} %".format(classification_report))
skplt.metrics.plot_precision_recall_curve(np.argmax(y_test, axis=1), probas)
plt.show()
skplt.metrics.plot_roc_curve(np.argmax(y_test, axis=1), probas)
plt.show()

Esempio n. 51

0

Mostra file

from sklearn.metrics import confusion_matrix

accuracy_score(y_cv, pred_cv)
matrix = confusion_matrix(y_cv, pred_cv)
print(matrix)

# In[72]:

from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()
nb.fit(x_train, y_train)

# In[73]:

pred_cv4 = nb.predict(x_cv)

# In[74]:

print("Accuracy:", metrics.accuracy_score(y_cv, pred_cv4))

# In[75]:

pred_test = nb.predict(testdf)

# In[85]:

finaldf['Loan_Status'] = pred_test
finaldf.head()

# In[86]:

Esempio n. 52

0

Mostra file

File: iris.py Progetto: shreyams162/Practice_MachineLearning

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25, random_state = 0)

#Logistic Regression
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_pred, y_test)

#Naive bayes
from sklearn.naive_bayes import GaussianNB
classifier_NB = GaussianNB()
classifier_NB.fit(X_train, y_train)

y_pred_NB = classifier_NB.predict(X_test)

from sklearn.metrics import confusion_matrix
cm_NB = confusion_matrix(y_test, y_pred)

from sklearn.metrics import accuracy_score
accuracy_NB = accuracy_score(y_pred, y_test)

Esempio n. 53

0

Mostra file

    def TrainModel(self):
        self.browser.clear()
        # Set Data Set
        X_train, X_test, y_train, y_test = self.X_train, self.X_test, self.y_train, self.y_test
        X_train1, X_test1, y_train1, y_test1 = X_train.values, X_test.values, y_train, y_test
        self.browser.append("Load Dataset")
        self.browser.append("")
        self.browser.append("")

        # LogisticRegression
        logreg = LogisticRegression()
        logreg.fit(X_train1, y_train1)
        y_pred_logreg = logreg.predict(X_test1)
        acc_log_train = round(logreg.score(X_train1, y_train1) * 100, 2)
        acc_log_test = round(logreg.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Logistic Regression Model>")
        self.browser.append("Train acc : " + str(acc_log_train) + "%")
        self.browser.append("Test acc : " + str(acc_log_test) + "%")
        self.browser.append("")
        #time.sleep(3)

        # Support Vector Machine's
        svc = SVC()
        svc.fit(X_train1, y_train1)
        y_pred_svc = svc.predict(X_test1)
        acc_svc_train = round(svc.score(X_train1, y_train1) * 100, 2)
        acc_svc_test = round(svc.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Support Vector Machine's>")
        self.browser.append("Train acc : " + str(acc_svc_train) + "%")
        self.browser.append("Test acc : " + str(acc_svc_test) + "%")
        self.browser.append("")
        #time.sleep(3)

        # Naive Bayes
        gaussian = GaussianNB()
        gaussian.fit(X_train1, y_train1)
        y_pred_gau = gaussian.predict(X_test1)
        acc_gau_train = round(gaussian.score(X_train1, y_train1) * 100, 2)
        acc_gau_test = round(gaussian.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Naive Bayes>")
        self.browser.append("Train acc : " + str(acc_gau_train) + "%")
        self.browser.append("Test acc : " + str(acc_gau_test) + "%")
        self.browser.append("")

        # K-Nearest Neighbours
        knn = KNeighborsClassifier(n_neighbors=3)
        knn.fit(X_train1, y_train1)
        y_pred_knn = knn.predict(X_test1)
        acc_knn_train = round(knn.score(X_train1, y_train1) * 100, 2)
        acc_knn_test = round(knn.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<K-Nearest Neighbours>")
        self.browser.append("Train acc : " + str(acc_knn_train) + "%")
        self.browser.append("Test acc : " + str(acc_knn_test) + "%")
        self.browser.append("")

        # Decision Tree's
        dec = DecisionTreeClassifier()
        dec.fit(X_train1, y_train1)
        y_pred_dec = dec.predict(X_test1)
        acc_dec_train = round(dec.score(X_train1, y_train1) * 100, 2)
        acc_dec_test = round(dec.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Decision Tree's>")
        self.browser.append("Train acc : " + str(acc_dec_train) + "%")
        self.browser.append("Test acc : " + str(acc_dec_test) + "%")
        self.browser.append("")

        #sgd
        sgd = SGDClassifier(max_iter=10000)
        sgd.fit(X_train1, y_train1)
        y_pred_sgd = sgd.predict(X_test1)
        acc_sgd_train = round(sgd.score(X_train1, y_train1) * 100, 2)
        acc_sgd_test = round(sgd.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Stochastic Gradient Decent Classifier>")
        self.browser.append("Train acc : " + str(acc_sgd_train) + "%")
        self.browser.append("Test acc : " + str(acc_sgd_test) + "%")
        self.browser.append("")

        #Linear SVC
        l_svc = LinearSVC()
        l_svc.fit(X_train1, y_train1)
        y_pred_l_svc = l_svc.predict(X_test1)
        acc_l_svc_train = round(l_svc.score(X_train1, y_train1) * 100, 2)
        acc_l_svc_test = round(l_svc.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Linear Support Vector Machines>")
        self.browser.append("Train acc : " + str(acc_l_svc_train) + "%")
        self.browser.append("Test acc : " + str(acc_l_svc_test) + "%")
        self.browser.append("")

        #Perceptron
        per = Perceptron(max_iter=1000)
        per.fit(X_train1, y_train1)
        y_pred_per = per.predict(X_test1)
        acc_per_train = round(per.score(X_train1, y_train1) * 100, 2)
        acc_per_test = round(per.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Perceptron>")
        self.browser.append("Train acc : " + str(acc_per_train) + "%")
        self.browser.append("Test acc : " + str(acc_per_test) + "%")
        self.browser.append("")

        #Random Forest
        random_forest = RandomForestClassifier(n_estimators=100)
        random_forest.fit(X_train1, y_train1)
        y_pred_random_forest = random_forest.predict(X_test1)
        acc_random_forest_train = round(
            random_forest.score(X_train1, y_train1) * 100, 2)
        acc_random_forest_test = round(
            random_forest.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Random Forest>")
        self.browser.append("Train acc : " + str(acc_random_forest_train) +
                            "%")
        self.browser.append("Test acc : " + str(acc_random_forest_test) + "%")
        self.browser.append("")

        models = pd.DataFrame({
            'Model': [
                'Support Vector Machines', 'KNN', 'Logistic Regression',
                'Random Forest', 'Naive Bayes', 'Perceptron',
                'Stochastic Gradient Decent', 'Linear SVC', 'Decision Tree'
            ],
            'Score': [
                acc_svc_test, acc_knn_test, acc_log_test,
                acc_random_forest_test, acc_gau_test, acc_per_test,
                acc_sgd_test, acc_l_svc_test, acc_dec_test
            ]
        })
        models.sort_values(by='Score', ascending=True)
        models = PandasModelTrainData(models)
        self.tableView = QTableView()
        self.tableView.setSortingEnabled(True)
        self.tableView.setModel(models)
        self.tableView.setGeometry(850, 100, 320, 400)
        self.tableView.setColumnWidth(0, 200)
        self.tableView.sortByColumn(1, Qt.DescendingOrder)
        self.tableView.setWindowTitle("Accuracy")
        self.tableView.show()

Esempio n. 54

0

Mostra file

File: naive_bayes.py Progetto: dalalbhargav07/Machine-Learning-A-Z

                                                    test_size=0.25,
                                                    random_state=0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

# Visualising the Training set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_train, y_train
colors = np.array(["red", "green"])
X1, X2 = np.meshgrid(
    np.arange(start=X_set[:, 0].min() - 1,
              stop=X_set[:, 0].max() + 1,
              step=0.01),
    np.arange(start=X_set[:, 1].min() - 1,
              stop=X_set[:, 1].max() + 1,

Esempio n. 55

0

Mostra file

cm = confusion_matrix(y_test, y_pred)

print(cm)

# In[6]:

from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()

start = time.time()
gnb.fit(X_train, y_train)
print('training completed in %s seconds' % (time.time() - start))

start = time.time()
y_pred = gnb.predict(X_test)
print('prediction completed in %s seconds' % (time.time() - start))

cm = confusion_matrix(y_test, y_pred)

print(cm)

# In[8]:

from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(criterion='entropy')

start = time.time()
dt.fit(X_train, y_train)
print('training completed in %s seconds' % (time.time() - start))

Esempio n. 56

0

Mostra file

File: demo_lowrank.py Progetto: sohailkhanmarwat/Hyperspectral_Image_Classification_Package

KNN_predict_prob = KNN.predict_proba(data_all_scaled)
# Post-processing using Graph-Cut
Seg_Label, seg_accuracy = Post_Processing(KNN_predict_prob,height,width,\
                                          num_classes,y_test,test_indexes)
print('(KNN) Train_Acc=%.3f, Cla_Acc=%.3f, Seg_Acc=%.3f(Time_cost=%.3f)'\
      % (KNN.score(X_train_scaled,y_train),KNN.score(X_test_scaled,y_test),\
         seg_accuracy, (time.time()-start_time)))
# draw classification map
draw(GT_Label, KNN_Label, Seg_Label, train_map, test_map)
print('--------------------------------------------------------------------')

# Naive Bayes: GaussianNB
from sklearn.naive_bayes import GaussianNB
start_time = time.time()
GaussNB = GaussianNB().fit(X_train, y_train)
GaussNB_Label = GaussNB.predict(data_all).reshape(
    width, height).astype(int).transpose(1, 0)
GaussNB_predict_prob = GaussNB.predict_proba(data_all)
# Post-processing using Graph-Cut
Seg_Label, seg_accuracy = Post_Processing(GaussNB_predict_prob,height,width,\
                                          num_classes,y_test,test_indexes)
print('(GaussNB) Train_Acc=%.3f, Cla_Acc=%.3f, Seg_Acc=%.3f(Time_cost=%.3f)'\
      % (GaussNB.score(X_train,y_train),GaussNB.score(X_test,y_test),\
         seg_accuracy, (time.time()-start_time)))
# draw classification map
draw(GT_Label, GaussNB_Label, Seg_Label, train_map, test_map)
print('--------------------------------------------------------------------')

# discriminant_analysis - linear discriminant analysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
start_time = time.time()
LDA = LinearDiscriminantAnalysis().fit(X_train, y_train)

Esempio n. 57

0

Mostra file

File: naive_bayes.py Progetto: TheDG/MachineLearning_A-Z


# %% codecell
# preprocess data
x_train, x_test, y_train, y_test, sc_x = preprocessed_data.preprocess_data()


# %% codecell
# Fitting Naive Bayes to the Training set
classifier = GaussianNB()
classifier.fit(x_train, y_train)


# %% codecell
# Predicting the Test set results
y_pred = classifier.predict(x_test)


# %% codecell
# Making the Confusion Matrix
cm = confusion_matrix(y_test, y_pred)


# %% codecell
# Visualising the Training set results
x_set, y_set = x_train, y_train
X1, X2 = np.meshgrid(np.arange(start=x_set[:, 0].min() - 1, stop=x_set[:, 0].max() + 1, step=0.01),
                     np.arange(start=x_set[:, 1].min() - 1, stop=x_set[:, 1].max() + 1, step=0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha=0.75, cmap=ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())

Esempio n. 58

0

Mostra file

File: app.py Progetto: manishmarahatta/smART

dup_df['Stage_cat'] = Stage_cat
dup_df['Duration_cat'] = Duration_cat
dup_df['CD4start_cat'] = CD4start_cat
dup_df['CD4number_cat'] = CD4number_cat
dup_df['CD4last_cat'] = CD4last_cat
dup_df['Perform_cat'] = Perform_cat

features = dup_df.values[:, :6]
target = dup_df.values[:, 6]
features_train, features_test, target_train, target_test = train_test_split(
    features, target, test_size=0.20, random_state=20)
# print(features_train)

clf = GaussianNB()
clf.fit(features_train, target_train)
target_pred = clf.predict(features_test)

acc = accuracy_score(target_test, target_pred, normalize=True)

PPS = {
    'pp1': {
        'gender': 1,
        'who_stage': 1,
        'duration': 1,
        'start_cd4': 1,
        'no_cd4_done': 1,
        'recent_cd4': 1
    }
}

Esempio n. 59

0

Mostra file

File: naive-bayes.py Progetto: rakesh230/Fake-news-Detection

    skplt.plot_confusion_matrix(yte, ypred)
    plt.show()


# Read the data
if not os.path.isfile('./xtr.npy') or \
    not os.path.isfile('./xte.npy') or \
    not os.path.isfile('./ytr.npy') or \
    not os.path.isfile('./yte.npy'):
    xtr, xte, ytr, yte = getEmbeddings("datasets/train.csv")
    np.save('./xtr', xtr)
    np.save('./xte', xte)
    np.save('./ytr', ytr)
    np.save('./yte', yte)

xtr = np.load('./xtr.npy')
xte = np.load('./xte.npy')
ytr = np.load('./ytr.npy')
yte = np.load('./yte.npy')

# Use the built-in Naive Bayes classifier
gnb = GaussianNB()
gnb.fit(xtr, ytr)
y_pred = gnb.predict(xte)
m = yte.shape[0]
n = (yte != y_pred).sum()
print("Accuracy = " + format((m - n) / m * 100, '.2f') + "%")  # 72.94%

# Draw the confusion matrix
plot_cmat(yte, y_pred)

Esempio n. 60

0

Mostra file

File: power_newbayes.py Progetto: jiangzhongkai/python_1

    print("normal_error.shape",normal_error.shape)
    print("abno_error.shape",abno_error.shape)

    normal_error = np.c_[normal_error, np.zeros(len(normal_error))]
    abno_error = np.c_[abno_error, np.ones(len(abno_error))]


    dataset = np.r_[normal_error, abno_error]
    np.random.shuffle(dataset)

    train_x, test_x, train_y, test_y = train_test_split(dataset[:,:-1], dataset[:,-1], test_size=0.3, random_state=42)


    clf = GaussianNB()
    clf.fit(train_x, train_y)
    y_hat = clf.predict(train_x)
    y_score = clf.predict_proba(train_x)
    y_log_score = clf.predict_log_proba(train_x)
    y_test_hat = clf.predict(test_x)
    y_test_score = clf.predict_proba(test_x)
    print(accuracy_score(train_y, y_hat))
    print(metrics.recall_score(train_y, y_hat))
    print(metrics.classification_report(train_y, y_hat))
    print(metrics.classification_report(test_y, y_test_hat))
    print(y_score)
    print(y_test_score)
    print(y_test_hat)
    print(clf.classes_)

    # fpr, tpr, thresholds = metrics.roc_curve(train_y, y_hat)
    fpr, tpr, thresholds = metrics.roc_curve(test_y, y_test_score[:,-1])