def main(): config = dict() config['resource_dir'] = os.path.abspath(os.path.join(os.path.realpath(__file__), '../../')) + "/resources/" config['raw_file'] = config['resource_dir'] + "ideal_weight.csv" ideal_weight_df = None ideal_weight_df = pd.read_csv(config['raw_file']) ideal_weight_df.columns = [x.replace("\'","") for x in ideal_weight_df.columns.values.tolist()] ideal_weight_df.loc[:,'sex'] = ideal_weight_df['sex'].map(lambda x: x.replace("\'","")) #print ideal_weight_df #print config plt.hist(ideal_weight_df['actual'], alpha=0.5, label='actual') plt.hist(ideal_weight_df['ideal'], alpha=0.5, label='ideal') plt.show() # figure_1.png ideal_weight_df['diff'].hist() ideal_weight_df['sex_id'] = ideal_weight_df['sex'].map(lambda x: 1 if x == 'Male' else 0) clf = GaussianNB() clf.fit(ideal_weight_df[['actual','ideal','diff']],ideal_weight_df['sex']) print clf.predict([[145,160,-15]]) # male print clf.predict([[160,145,15]]) # female
def test_classification(): t = zeros(len(target)) t[target == 'setosa'] = 1 t[target == 'versicolor'] = 2 t[target == 'virginica'] = 3 from sklearn.naive_bayes import GaussianNB classifier = GaussianNB() classifier.fit(data,t) # training on the iris dataset print classifier.predict(data[0]) print t[0] from sklearn import cross_validation train, test, t_train, t_test = cross_validation.train_test_split(data, t, test_size=0.4, random_state=0) classifier.fit(train,t_train) # train print classifier.score(test,t_test) # test from sklearn.metrics import confusion_matrix print confusion_matrix(classifier.predict(test),t_test) from sklearn.metrics import classification_report print classification_report(classifier.predict(test), t_test, target_names=['setosa', 'versicolor', 'virginica']) from sklearn.cross_validation import cross_val_score # cross validation with 6 iterations scores = cross_val_score(classifier, data, t, cv=6) print scores from numpy import mean print mean(scores)
def MyNaiveBayes(object): pre = PreProcess() (training_value, test_value, test_pos_x, test_pos_y, training_pos_x, training_pos_y) = pre.split() # 模型初始化 clf_x = GaussianNB() clf_y = GaussianNB() # 进行模型的训练 clf_x.fit(training_value, training_pos_x) clf_y.fit(training_value, training_pos_y) # 计算结果 result_pos_x = clf_x.predict(test_value) result_pos_y = clf_y.predict(test_value) ''' print result_pos_x print test_pos_x print result_pos_y print test_pos_y ''' # 计算误差 x_dis = [] y_dis = [] d_dis = [] for i in range(len(result_pos_x)): x_dis.append(abs(result_pos_x[i] - test_pos_x[i])) y_dis.append(abs(result_pos_y[i] - test_pos_y[i])) d_dis.append(math.sqrt((result_pos_x[i]-test_pos_x[i])**2+(result_pos_y[i]-test_pos_y[i])**2)) x = (sum(x_dis))/len(result_pos_x) y = (sum(y_dis))/len(result_pos_y) d = (sum(d_dis))/len(d_dis) print x, y, d return x, y, d
class GaussianNBClassifier: def __init__(self): """ This is the constructor responsible for initializing the classifier """ self.outputHeader = "#gnb" self.clf = None def buildModel(self): """ This builds the model of the Gaussian NB classifier """ self.clf = GaussianNB() def trainGaussianNB(self,X, Y): """ Training the Gaussian NB Classifier """ self.clf.fit(X, Y) def validateGaussianNB(self,X, Y): """ Validate the Gaussian NB Classifier """ YPred = self.clf.predict(X) print accuracy_score(Y, YPred) def testGaussianNB(self,X, Y): """ Test the Gaussian NB Classifier """ YPred = self.clf.predict(X) print accuracy_score(Y, YPred)
def NB(text): ### features_train and features_test are the features for the training ### and testing datasets, respectively ### labels_train and labels_test are the corresponding item labels features_train, features_test, labels_train, labels_test = Preprocess() Ifeatures_train,Ifeatures_test,Ilabels_train=preprocess_input([text]) # classification goes here clf = GaussianNB() # training train_t0 = time() clf.fit(features_train, labels_train) train_t1 = time() # prediction or testing test_t0 = time() predict = clf.predict(features_test) test_t1 = time() print "accuracy: ", clf.score(features_test, labels_test) print "#################################" print "tain time: ", round(train_t1 - train_t0, 3), "s" print "prediction time: ", round(test_t1 - test_t0, 3), "s" print "#################################" clf.fit(Ifeatures_train,Ilabels_train) print ("prediction of ",str(clf.predict(Ifeatures_test))[1]) #print "prediction of ", clf.predict(preprocess_input(text)) return str(clf.predict(Ifeatures_test))[1]
class GaussianNBLearner(AbstractLearner): """ Gaussian Naive Bayes Learner http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html We need to use X.toarray() because those functions expect dense arrays. """ def __init__(self): self.nb = GaussianNB() def train(self, X, Y): if hasattr(X, 'toarray'): self.nb.fit(X.toarray(), Y) else: self.nb.fit(X, Y) def predict(self, X): if (hasattr(X, "toarray")): return self.nb.predict(X.toarray()) else: return self.nb.predict(X) def score(self, X, Y): return np.mean(np.abs(self.nb.predict(X) - np.array(Y)))
def naive_bayes(train_features, train_labels, test_features, test_labels): # Train SVM classifier model = GaussianNB() model.fit(train_features, train_labels) test_results = model.predict(test_features) train_results = model.predict(train_features) return (test_results, train_results)
class NBMatcher(MLMatcher): def __init__(self, *args, **kwargs): super(NBMatcher, self).__init__(*args, **kwargs) self.clf = GaussianNB(*args, **kwargs) def fit(self, X, Y): self.clf.fit(X, Y) def predict(self, X): self.clf.predict(X)
def bayes_test(): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) Y = np.array([1, 1, 1, 2, 2, 2]) clf = GaussianNB() clf.fit(X, Y) print(clf.predict([[-0.8, -1]])) clf_pf = GaussianNB() clf_pf.partial_fit(X, Y, np.unique(Y)) print(clf_pf.predict([[-0.8, -1]]))
def classify(features_train, labels_train, features_test, labels_test): classifier = GaussianNB() t0 = time() classifier.fit(features_train, labels_train) print "training time: ", round(time() - t0), "s" t1 = time() classifier.predict(features_test) print "predicting time: ", round(time() - t1), "s" return classifier.score(features_test, labels_test)
class TreeClassifier(Classifier): def __init__(self): self.classifier = GaussianNB() def do_train(self, X, y): self.classifier.fit(X, y) def do_classification(self, X, y): self.classifier.predict(X, y)
class NaiveBayes: __theta = 0 __sigma = 0 def __init__(self): pass #self.__new_data = 0 def learning(self,x_data,y_data): self.rssi = np.loadtxt(x_data, delimiter=',') print(self.rssi) self.position = np.loadtxt(y_data, delimiter=',') print(self.position) self.gaussian_nb = GaussianNB() from sklearn.cross_validation import train_test_split rssi_train, rssi_test, position_train, position_test = train_test_split(self.rssi, self.position, random_state=0) self.gaussian_nb.fit(rssi_train,position_train) print("theta",self.gaussian_nb.theta_) print("sigma",self.gaussian_nb.sigma_) predicted = self.gaussian_nb.predict(rssi_test) print(metrics.accuracy_score(position_test, predicted)) ''' def set_params(self,theta,sigma): __theta = theta __sigma = sigma print __theta print __sigma ''' def inference(self,r_data): self.predicted_class = self.gaussian_nb.predict(r_data) post_prob = self.gaussian_nb.predict_proba(r_data) log_prob = self.gaussian_nb.predict_log_proba(r_data) self.post_prob_float16 = post_prob.astype(np.float16) #E = 1*self.post_prob_float16[0][0]+2*self.post_prob_float16[0][1]+3*self.post_prob_float16[0][2] #var = (1*self.post_prob_float16[0][0]+4*self.post_prob_float16[0][1]+9*self.post_prob_float16[0][2])-E**2 #print(self.post_prob_float16) #print(self.post_prob_float16[0]) #print(var) print(self.predicted_class) #print(self.gaussian_nb.class_prior_) #print(log_prob) return self.predicted_class def output(self): output = graph.Graph() output.bar_graph(self.post_prob_float16[0])
def predict_author(arr, yazar_features, yazar_classes): results = [] print "\n[DEBUG] K-NN result (neighbors: 10)" knn = KNeighborsClassifier(n_neighbors=10) knn.fit(yazar_features, yazar_classes) print knn.predict(arr) results.append(knn.predict(arr)[0]) print "\n[DEBUG] SVC result (linear) (degree=3)" svc = svm.SVC(kernel='linear', degree=3) svc.fit(yazar_features, yazar_classes) print svc.predict(arr) results.append(svc.predict(arr)[0]) print "\n[DEBUG] Logistic Regression result ()" regr = linear_model.LogisticRegression() regr.fit(yazar_features, yazar_classes) print regr.predict(arr) results.append(regr.predict(arr)[0]) print "\n[DEBUG] Gaussian Naive Bayes" gnb = GaussianNB() gnb.fit(yazar_features, yazar_classes) print gnb.predict(arr) results.append(gnb.predict(arr)[0]) print "\n[DEBUG] Decision Tree Classifier" dtc = tree.DecisionTreeClassifier() dtc.fit(yazar_features, yazar_classes) print dtc.predict(arr) results.append(dtc.predict(arr)[0]) print "\n[DEBUG] Gradient Boosting Classification" gbc = GradientBoostingClassifier() gbc.fit(yazar_features, yazar_classes) print gbc.predict(arr) results.append(gbc.predict(arr)[0]) # output = open('features.pkl', 'wb') # pickle.dump(yazar_features, output) # output.close() # output = open('classes.pkl', 'wb') # pickle.dump(yazar_classes, output) # output.close() # test_yazar_features = [] # for test data # test_yazar_classes = [] # for test classes # # yazar_features = [] # for train data # # yazar_classes = [] # for train classes return results
def trainer(dataset = "Features.csv"): # Train the various machine learning algorithms using the features extracted. data, labels = extractor(dataset) train, test, train_labels, test_labels = train_test_split(data, labels, test_size = 0.20, random_state = 42) names, expected_results = zip(*test_labels) names1, train_labels = zip(*train_labels) print 'S' + '\t' + 'H' + '\t' + 'F' + '\t' + 'A' + '\t' + 'N' # Random Forest Classifier rf = RandomForestClassifier(n_estimators = 100, n_jobs = 2) rf.fit(train, train_labels) results_boosting = rf.predict(test) conf_matrix = confusion_matrix(expected_results, results_boosting) print "Forset Classifier:\n" print conf_matrix accuracy_Boosting = float(np.trace(conf_matrix))/float(np.sum(conf_matrix)) print accuracy_Boosting # KNN Classifier neigh = KNeighborsClassifier(n_neighbors=3) neigh.fit(train, train_labels) results_KNN = neigh.predict(test) conf_matrix = confusion_matrix(expected_results, results_KNN) print "KNN Classifier:\n" print conf_matrix accuracy_KNN = float(np.trace(conf_matrix))/float(np.sum(conf_matrix)) print accuracy_KNN # Baye's Classifier clf = GaussianNB() clf.fit(train, train_labels) results_Bayes = clf.predict(test) conf_matrix = confusion_matrix(expected_results, results_Bayes) print "\nBayes Classifier:\n" print conf_matrix accuracy_Bayes = float(np.trace(conf_matrix))/float(np.sum(conf_matrix)) print accuracy_Bayes # Neural Network clf = BernoulliNB() clf.fit(train, train_labels) results_NN = clf.predict(test) conf_matrix = confusion_matrix(expected_results, results_NN) print "\nNeural Network:\n" print conf_matrix accuracy_NN = float(np.trace(conf_matrix))/float(np.sum(conf_matrix)) print accuracy_NN documenter(names, results_boosting, results_Bayes, results_NN, results_KNN, accuracy_Boosting, accuracy_Bayes, accuracy_NN, accuracy_KNN)
def NBAccuracy(features_train, labels_train, features_test, labels_test): """ compute the accuracy of your Naive Bayes classifier """ ### import the sklearn module for GaussianNB from sklearn.naive_bayes import GaussianNB ### create classifier clf = GaussianNB() ### fit the classifier on the training features and labels clf.fit(features_train, labels_train) ### use the trained classifier to predict labels for the test features pred = clf.predict(features_test) ### calculate and return the accuracy on the test data ### this is slightly different than the example, ### where we just print the accuracy ### you might need to import an sklearn module intersect = [i for i, j in zip(pred, labels_test) if i == j] matched = len(intersect) total = len(labels_test) accuracy = float(matched) / float(total) return accuracy
class GaussianColorClassifier(ContourClassifier): ''' A contour classifier which classifies a contour based on it's mean color in BGR, HSV, and LAB colorspaces, using a Gaussian classifier for these features. For more usage info, see class ContourClassifier ''' FEATURES = ['B', 'G', 'R', 'H', 'S', 'V', 'L', 'A', 'B'] def __init__(self, classes, **kwargs): super(GaussianColorClassifier, self).__init__(classes, **kwargs) self.classifier = GaussianNB() def get_features(self, img, mask): mean = cv2.mean(img, mask) mean = np.array([[mean[:3]]], dtype=np.uint8) mean_hsv = cv2.cvtColor(mean, cv2.COLOR_BGR2HSV) mean_lab = cv2.cvtColor(mean, cv2.COLOR_BGR2LAB) features = np.hstack((mean.flatten(), mean_hsv.flatten(), mean_lab.flatten())) return features def classify_features(self, features): return self.classifier.predict(features) def feature_probabilities(self, features): return self.classifier.predict_proba(features) def train(self, features, classes): self.classifier.fit(features, classes)
def categorize(train_data,test_data,train_class,n_features): #cf= ExtraTreesClassifier() #cf.fit(train_data,train_class) #print (cf.feature_importances_) #lsvmcf = sklearn.svm.LinearSVC(penalty='l2', loss='l2', dual=True, tol=0.0001, C=100.0) model = LogisticRegression() lgr = LogisticRegression(C=100.0,penalty='l1') #knn = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=10, p=2, metric='minkowski', metric_params=None) svmlcf = sklearn.svm.SVC(C=1000.0, kernel='linear', degree=1, gamma=0.01, probability=True)#2 svmcf = sklearn.svm.SVC(C=1000.0, kernel='rbf', degree=1, gamma=0.01, probability=True)#2 cf = DecisionTreeClassifier() dct = DecisionTreeClassifier(criterion='gini', splitter='best', min_samples_split=7, min_samples_leaf=4) rf = RandomForestClassifier(n_estimators=10, criterion='gini', min_samples_split=7, min_samples_leaf=4, max_features='auto') gnb = GaussianNB() #1 adbst = sklearn.ensemble.AdaBoostClassifier(base_estimator=rf, n_estimators=5, learning_rate=1.0, algorithm='SAMME.R', random_state=True) #ch2 = SelectKBest(chi2, k=n_features) #train_data = ch2.fit_transform(train_data, train_class) #test_data = ch2.transform(test_data) #rfe = RFE(svmlcf,n_features) #rfe = rfe.fit(train_data, train_class) gnb.fit(train_data,train_class) return gnb.predict(test_data)
def NBAccuracy(features_train, labels_train, features_test, labels_test): """ compute the accuracy of your Naive Bayes classifier """ ### import the sklearn module for GaussianNB from sklearn.naive_bayes import GaussianNB ### create classifier clf = GaussianNB() ### fit the classifier on the training features and labels clf.fit(features_train, labels_train) ### use the trained classifier to predict labels for the test features pred = clf.predict(features_test) ### calculate and return the accuracy on the test data ### this is slightly different than the example, ### where we just print the accuracy ### you might need to import an sklearn module total = len(labels_test) correct = (pred == labels_test).sum() accuracy = correct/float(total) from sklearn.metrics import accuracy_score accuracy = accuracy_score(labels_test,pred ) return accuracy
def NBAccuracy(features_train, labels_train, features_test, labels_test): """ compute the accuracy of your Naive Bayes classifier """ ### import the sklearn module for GaussianNB from sklearn.naive_bayes import GaussianNB ### create classifier clf = GaussianNB() t0 = time() ### fit the classifier on the training features and labels clf.fit(features_train, labels_train) print "training time:", round(time()-t0, 3), "s" ### use the trained classifier to predict labels for the test features import numpy as np t1 = time() pred = clf.predict(features_test) print "predicting time:", round(time()-t1, 3), "s" ### calculate and return the accuracy on the test data ### this is slightly different than the example, ### where we just print the accuracy ### you might need to import an sklearn module accuracy = clf.score(features_test, labels_test) return accuracy
def main(argv): if len(argv) != 5: print "./NB_train_pred.py train.csv train_lable test.csv save_folder label_idx" sys.exit(1); output_folder = argv[3] label_idx = int(argv[4]) os.system("mkdir " + output_folder) print "Loading training data" train_array = np.load(argv[0]) print "Loading training label" train_label_array = np.load(argv[1]) print "Loading test data" test_array = np.load(argv[2]) print "building NB on label " + str(label_idx) gnb = GaussianNB() model = gnb.fit(train_array[:, 1:], train_label_array[1:, label_idx]) print "predicting label " + str(label_idx) nb_pred = gnb.predict(test_array[:,1:]) print "save the result" with open(output_folder + "/" + str(label_idx) + ".pred", 'w') as pred_file: pred_file.write("\n".join([ str(x) for x in nb_pred.tolist()])) with open(output_folder+"/"+str(label_idx) + ".npy", 'wb') as npy_file: np.save(npy_file, nb_pred)
def gnbmodel(d,X_2,y_2,X_3,y_3,X_test,y_test): X_3_copy = X_3.copy(deep=True) X_3_copy['chance']=0 index = 0 ########## k折交叉验证 ########################### scores = cross_val_score(GaussianNB(), X_2, y_2, cv=5, scoring='accuracy') score_mean =scores.mean() print(d+'5折交互检验:'+str(score_mean)) ################################################# gnb = GaussianNB().fit(X_2,y_2) ################ 预测测试集 ################ answer_gnb = gnb.predict(X_test) accuracy = metrics.accuracy_score(y_test,answer_gnb) print(d+'预测:'+str(accuracy)) ############################################### chance = gnb.predict_proba(X_3)[:,1] for c in chance: X_3_copy.iloc[index,len(X_3_copy.columns)-1]=c index += 1 chance_que = X_3_copy.iloc[:,len(X_3_copy.columns)-1] return chance_que
def NBAccuracy(features_train, labels_train, features_test, labels_test): #Import sklearn modules for GaussianNB from sklearn.naive_bayes import GaussianNB from sklearn.metrics import accuracy_score #Create classifer classifer = GaussianNB(); #Timing fit algorithm t0 = time(); #Fit classier on the training features classifer.fit(features_train, labels_train); print "Training Time: ", round(time() - t0, 3), "s"; GaussianNB(); #Timing prediction algorithm t0=time(); #Use trained classifer to predict labels for test features pred = classifer.predict(features_test); print "Prediction Time: ", round(time() - t0, 3), "s"; #Calculate accuracy from features_test with answer in labels_test accuracy = accuracy_score(pred, labels_test); return accuracy;
def classifier(model,X,X1,y,y1): t0 = time.time() if model=='gnb': print 'GNB' gnb = GaussianNB().fit(X, y) elif model=='mnb': print 'MNB' gnb = MultinomialNB().fit(X,y) elif model=='bnb': print 'BNB' gnb = BernoulliNB().fit(X, y) elif model=='lin': print 'Linear SVM' gnb = svm.SVC(kernel='linear', C=0.5).fit(X, y) elif model=='rbf': print 'RBF SVM' gnb = svm.SVC().fit(X, y) elif model=='poly': print 'Poly SVM' gnb = svm.SVC(kernel='poly', degree=2).fit(X, y) elif model=='rfc': print 'Random Forest' gnb = RandomForestClassifier(max_depth=10, n_estimators=100, max_features=5).fit(X, y) elif model=='lr': print 'Logistic Regression' gnb = LogisticRegression().fit(X, y) elif model=='knn': print "K nearest neighbours" gnb = KNeighborsClassifier(n_neighbors=6).fit(X, y) y_pred = gnb.predict(X1) print accuracy_score(y1, y_pred), f1_score(y1, y_pred) print time.time() - t0
class PatternBasedDiagnosis: """ Pattern Based Diagnosis with Decision Tree """ __slots__ = [ "model" ] def __init__(self): pass def train(self, data, labels): """ Train the decision tree with the training data :param data: :param labels: :return: """ print('Training Data: %s' % (data)) print('Training Labels: %s' % (labels)) self.model = GaussianNB() self.model = self.model.fit(data, labels) def eval(self, obs): # print('Testing Result: %s; %s' % (self.model.predict(obs), self.model.predict_proba(obs))) print('Testing Result: %s' % self.model.predict(obs))
def getGaussianPred(featureMatrix, labels, testSet, testSet_docIndex): """ All input arguments are return of getTrainTestData() :param featureMatrix: :param labels: :param testSet: :param testSet_docIndex: :return docIndexPred: dict{docid: [index1, index2, ...], ...} key is docid value is all cognates' index """ gnb = GaussianNB() gnb.fit(featureMatrix, labels) # pred = gnb.predict(featureMatrix) pred = gnb.predict(testSet) docIndexPred = dict() for i, p in enumerate(pred): if p: docid = testSet_docIndex[i, 0] index = testSet_docIndex[i, 1] if docid in docIndexPred: docIndexPred[docid].append(index) else: docIndexPred[docid] = [index] return docIndexPred
class RegularizedGaussianNB: """ Three types of regularization are possible: - regularized the variance of a feature within a class toward the average variance of all features from that class - regularize the variance of a feature within a class toward its pooled variance across all classes - add some constant amount of variance to each feature In practice, the latter seems to work the best, though the regularization value should be cross-validated. """ def __init__(self, avg_weight = 0, pooled_weight = 0, extra_variance = 0.1): self.pooled_weight = pooled_weight self.avg_weight = avg_weight self.extra_variance = extra_variance self.model = GaussianNB() def fit(self, X,Y): self.model.fit(X,Y) p = self.pooled_weight a = self.avg_weight ev = self.extra_variance original_weight = 1.0 - p - a pooled_variances = np.var(X, 0) for i in xrange(self.model.sigma_.shape[0]): class_variances = self.model.sigma_[i, :] new_variances = original_weight*class_variances + \ p * pooled_variances + \ a * np.mean(class_variances) + \ ev self.model.sigma_[i, :] = new_variances def predict(self, X): return self.model.predict(X)
def univariateFeatureSelection(f_list, my_dataset): result = [] for feature in f_list: # Replace 'NaN' with 0 for name in my_dataset: data_point = my_dataset[name] if not data_point[feature]: data_point[feature] = 0 elif data_point[feature] == 'NaN': data_point[feature] =0 data = featureFormat(my_dataset, ['poi',feature], sort_keys = True, remove_all_zeroes = False) labels, features = targetFeatureSplit(data) features = [abs(x) for x in features] from sklearn.cross_validation import StratifiedShuffleSplit cv = StratifiedShuffleSplit(labels, 1000, random_state = 42) features_train = [] features_test = [] labels_train = [] labels_test = [] for train_idx, test_idx in cv: for ii in train_idx: features_train.append( features[ii] ) labels_train.append( labels[ii] ) for jj in test_idx: features_test.append( features[jj] ) labels_test.append( labels[jj] ) from sklearn.naive_bayes import GaussianNB clf = GaussianNB() clf.fit(features_train, labels_train) predictions = clf.predict(features_test) score = score_func(labels_test,predictions) result.append((feature,score[0],score[1],score[2])) result = sorted(result, reverse=True, key=lambda x: x[3]) return result
class CruiseAlgorithm(object): # cruise algorithm is used to classify the cruise phase vs noncruise phase, it uses the differential change in data stream as the input matrix def __init__(self, testing=False): self.core = GaussianNB() self.scaler = RobustScaler() self.X_prev = None self.testing = testing def fit(self,X,Y): # Y should be the label of cruise or not X = self.prepare(X) self.core.fit(X,Y.ravel()) def predict(self, X): if self.testing: X_t = self.prepare(X) else: if self.X_prev: X_t = X - self.X_prev else: X_t = X self.X_prev = X print repr(X_t) prediction_result = self.core.predict(X_t) return np.asmatrix(prediction_result) def prepare(self,X): a = np.zeros((X.shape[0],X.shape[1])) for i in xrange(X.shape[0]-1): a[i+1,:] = X[i+1] - X[i] return a
def myClassifier(X,Y,model,CV=4, scoreType='pure'): # X = [[0, 0], [1, 1],[1, 2]] # y = [0, 1, 2] score = {} print "Error Analysis using", scoreType if model == "SVM": clf = svm.SVC(probability=True, random_state=0, kernel='rbf') #clf = svm.SVR(cache_size=7000) elif model == "LR": clf = linear_model.LogisticRegression() clf.fit(X, Y) elif model == "NB": clf = GaussianNB() clf.fit(X, Y) elif model=='MLP': # multilayer perceptron clf = MLPClassifier( hidden_layer_sizes=[100],algorithm='l-bfgs') clf.fit(X, Y) if scoreType == 'cv': accu = np.mean(cross_validation.cross_val_score(clf, X, Y, scoring='accuracy',cv=CV)) elif scoreType == 'pure': predictions=clf.predict(X) accu = sum([int(predictions[q]==Y[q]) for q in range(len(Y))])/len(Y) return accu, clf
def selectKBest(previous_result, data): # remove 'restricted_stock_deferred' and 'director_fees' previous_result.pop(4) previous_result.pop(4) result = [] _k = 10 for k in range(0,_k): feature_list = ['poi'] for n in range(0,k+1): feature_list.append(previous_result[n][0]) data = featureFormat(my_dataset, feature_list, sort_keys = True, remove_all_zeroes = False) labels, features = targetFeatureSplit(data) features = [abs(x) for x in features] from sklearn.cross_validation import StratifiedShuffleSplit cv = StratifiedShuffleSplit(labels, 1000, random_state = 42) features_train = [] features_test = [] labels_train = [] labels_test = [] for train_idx, test_idx in cv: for ii in train_idx: features_train.append( features[ii] ) labels_train.append( labels[ii] ) for jj in test_idx: features_test.append( features[jj] ) labels_test.append( labels[jj] ) from sklearn.naive_bayes import GaussianNB clf = GaussianNB() clf.fit(features_train, labels_train) predictions = clf.predict(features_test) score = score_func(labels_test,predictions) result.append((k+1,score[0],score[1],score[2])) return result
with open('temp.apk', 'wb') as f: while size > 0: data = client.recv(1024) f.write(data) size -= len(data) print('APK Saved') ap = apk.APK('Apps/temp.apk') per = ap.get_permissions() permissions = [] for line in per: curr = '' for i in reversed(line): if i != '.': curr += i else: break curr = curr[::-1] permissions.append(curr) P = np.genfromtxt('Training/Perdiction.csv', delimiter=',') for i in permissions: if get_index.get(i) != None: P[get_index.get(i)] = 1.0 result = '' if clf.predict([P]) == 0.0: result = 'Non-Malicious' else: result = 'Malicious' client.sendall(result) client.close()
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation import matplotlib.pyplot as plt from sklearn.metrics import roc_curve, confusion_matrix dataset=pd.read_csv('divorce.csv',delimiter=";") a=dataset.drop_duplicates() print("DUPLICATE SONRASI YENİ VERİ SAYIMIZ:") print(len(a)) X=dataset.iloc[:,0:54] y=dataset["Class"] X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20, random_state=1) gaussian_bayes = GaussianNB() gaussian_bayes.fit(X_train,y_train.values.ravel()) y_pred = gaussian_bayes.predict(X_test) from sklearn.metrics import classification_report, confusion_matrix print(confusion_matrix(y_test,y_pred)) print(classification_report(y_test,y_pred)) print("CONFUSION MATRIX") print(confusion_matrix(y_test, y_pred)) print("Accuracy:",metrics.accuracy_score(y_test, y_pred)) #PRECIOSN SCORE from sklearn.metrics import precision_score, roc_auc_score print("Precision") print(precision_score(y_test, y_pred, average='weighted'))
# Read pixel values into X, read class values into y df_X = pandas.read_csv("../../data/x_train_gr_smpl.csv") df_y = pandas.read_csv("../../data/y_train_smpl.csv") # Shuffle the order of the data (keeping the X and y rows in sync) df_X, df_y = shuffle(df_X, df_y) # Split dataset into training and testing set, 90% and 10%, respectively X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.1, random_state=0) naive_bayes = GaussianNB() classifier = naive_bayes.fit(X_train, y_train) y_predicted = naive_bayes.predict(X_test) print("\nNaive Bayes accuracy score: ", round(metrics.accuracy_score(y_test, y_predicted) * 100, 2), "%\n") # Plot non-normalized confusion matrix labels = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] np.set_printoptions(precision=2) # Plot non-normalized confusion matrix titles_options = [("Confusion matrix, without normalization", None), ("Normalized confusion matrix", 'true')] for title, normalize in titles_options: disp = plot_confusion_matrix(classifier, X_test,
outcome_feature, test_size=0.5, random_state=0) ### ### Define Classifier ### clf = GaussianNB() ### ### Train Classifier on (X1,Y1) and Validate on (X2,Y2) ### clf.fit(X_1, Y_1) score = clf.score(X_2, Y_2) print("accuracy: {0}".format(score.mean())) ### ### Print Confusion Matrix ### output = clf.predict(X_2) matrix = confusion_matrix(output, Y_2) print(matrix) ### ### Save Classifier ### joblib.dump(clf, 'model/nb.pkl')
def NB(train, test, pred): naive = GaussianNB() naive.fit(train, pred) return naive.predict(test)
y_train_labeled = train_labeled['y'] x_train_labeled = train_labeled._drop_axis(['y'], axis=1) x_train_unlabeled = train_unlabeled #Switch to numpy # Preprocessing X x_train = [] x_train_labeled = np.array(x_train_labeled) x_train_unlabeled = np.array(x_train_unlabeled) x_train.extend(x_train_labeled) x_train.extend(x_train_unlabeled) x_test = np.array(test) # Preprocessing y y_train_labeled = np.array(y_train_labeled) ones = -1 * np.ones(21000) ones = np.array(ones) y_train = np.concatenate((y_train_labeled, ones)) # Trying Gaussian Naive Bayes gnb = GaussianNB() gnb.fit(x_train, y_train) y_pred = gnb.predict(x_test) # output results d = {'Id': test.index, 'y': y_pred} output = pd.DataFrame(d) output.to_csv('output1.csv', index=False) # from sklearn.metrics import accuracy_score # acc = accuracy_score(y, y_pred)
# Entrenamiento Supervisado: Clasificacion de Iris import matplotlib.pyplot as plt import numpy as np import seaborn as sns from sklearn.model_selection import cross_val_score, train_test_split from sklearn.naive_bayes import GaussianNB from sklearn.metrics import accuracy_score # Cargamos los datos iris = sns.load_dataset('iris') x_iris = iris.drop( 'species', axis='columns') # Eliminamos el campo especie de las columnas y_iris = iris['species'] # Dividimos los datos en dos conjuntos: entrenamiento y testeo xtrain, xtest, ytrain, ytest = train_test_split(x_iris, y_iris, random_state=1) model = GaussianNB() print("Entrenando el Modelo GaussianNB...") model.fit(xtrain, ytrain) print("Evaluando nuevos datos...") ymodel = model.predict(xtest) print("Precision final: {}".format(accuracy_score(ytest, ymodel)))
conducted between 1958 and 1970 at the University of Chicago's Billings Hospital on the survival of patients who had undergone surgery for cancer 1. Age of patient at time of operation (numerical) 2. Patient's year of operation (year - 1900, numerical) 3. Number of positive axillary nodes detected (numerical) 4. Survival status (class attribute) -- 1 = the patient survived 5 years or longer -- 2 = the patient died within 5 year ''' c1, c2, c3, c4 = np.loadtxt('data.csv', unpack=True, delimiter=',') x = np.column_stack((c1, c3)) y = c4 # Create NaiveBayes Classifier clf = GaussianNB() # fit the mode clf.fit(x, y) # make predictions predictions = clf.predict(x) # calculate accuracy print(accuracy_score(y, predictions)) from matplotlib import pyplot as plt plt.scatter(c1, c3, c=c4) plt.colorbar(ticks=[1, 2]) plt.xlabel("Age of the patient") plt.ylabel("No of positive axillary nodes")
test_size=0.3, random_state=109) # In[36]: X_train.shape, X_test.shape, y_train.shape, y_test.shape # In[37]: # Train the model using the training sets model.fit(X_train, y_train) # In[38]: #Predict the response for test dataset y_pred = model.predict(X_test) # In[39]: #Import scikit-learn metrics module for accuracy calculation from sklearn import metrics # Model Accuracy, how often is the classifier correct? print("Accuracy:", metrics.accuracy_score(y_test, y_pred)) # In[42]: ## Apply Algorithm from sklearn.ensemble import RandomForestClassifier random_forest_model = RandomForestClassifier(random_state=10)
# -*- coding: utf-8 -*- import pandas as pd base = pd.read_csv('risco_credito.csv') previsores = base.iloc[:, 0:4].values classe = base.iloc[:, 4].values from sklearn.naive_bayes import GaussianNB from sklearn.preprocessing import LabelEncoder labelEncoder = LabelEncoder() previsores[:, 0] = labelEncoder.fit_transform(previsores[:, 0]) previsores[:, 1] = labelEncoder.fit_transform(previsores[:, 1]) previsores[:, 2] = labelEncoder.fit_transform(previsores[:, 2]) previsores[:, 3] = labelEncoder.fit_transform(previsores[:, 3]) classificador = GaussianNB() classificador.fit(previsores, classe) # história boa, dívida alta, garantias nenhuma, renda > 35 # história ruim, dívida alta, garantias adequada, renda < 15 resultado = classificador.predict([[0, 0, 1, 2], [2, 0, 0, 0]]) print(classificador.classes_) print(classificador.class_count_) print(classificador.class_prior_)
features2[np.isnan(features2)] = -100 #TODO 1: Compute the cosine similarity matrix of your own wifi signal strength # def cosine_similarity(a,b): # numerator = np.dot(a,b) # x = np.sqrt(np.sum(np.square(a))) # y = np.sqrt(np.sum(np.square(b))) # denominator = x*y # return numerator/denominator def new_matrix(num): matrix=np.zeros((num.shape[0], num.shape[0])) for x in range(num.shape[0]): for y in range(num.shape[0]): matrix[x,y]=cosine_similarity(num[x,:],num[y,:]) return matrix similarity_matrix = new_matrix(features) plot_consine_similarity(similarity_matrix, labels) print(new_matrix(features).shape[0]) #TODO 2: Compute the cosine similarity matrix of two different people's wifi scans similarity_matrix2 = new_matrix(features2) plot_consine_similarity(similarity_matrix2, labels2) #TODO 3: Classify the location of the other person clf= GaussianNB() clf.fit(np.array(features2[:14]), np.array(labels2[:14])) for i in range(14,35): print(clf.predict(features2[i]))
y = dataset.iloc[:, 8:9].values # Splitting the dataset into the Training set and Test set from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0) # Fitting Naive Bayes to the Training set from sklearn.naive_bayes import GaussianNB classifier = GaussianNB() classifier.fit(X_train, y_train) # save the model to disk filename = 'Naive Bayes Diabetes.sav' pickle.dump(classifier, open(filename, 'wb')) # load the model from disk loaded_model = pickle.load(open(filename, 'rb')) # Predicting the Test set results result = loaded_model.score(X_test, y_test) print("Test score: {0:.2f} %".format(100 * result)) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix y_pred = classifier.predict(X_test) cm = confusion_matrix(y_test, y_pred) print(cm)
class GNB(object): def __init__(self): self.possible_labels = ['left', 'keep', 'right'] self.clf = GaussianNB() #self.clf = ExtraTreesClassifier(n_estimators=20, max_depth=45, min_samples_split=4, random_state=0) #self.clf = MLPClassifier(hidden_layer_sizes=(4000), #alpha=1e-8, momentum=.7, verbose=True, tol=1e-7, max_iter=400) self.scaler = StandardScaler() def train(self, data, labels): """ Trains the classifier with N data points and labels. INPUTS data - array of N observations - Each observation is a tuple with 4 values: s, d, s_dot and d_dot. - Example : [ [3.5, 0.1, 5.9, -0.02], [8.0, -0.3, 3.0, 2.2], ... ] labels - array of N labels - Each label is one of "left", "keep", or "right". """ #print(data) #print(labels) #x = [[i[0], i[2], i[3], i[1]%4] for i in data] x = [[i[3]] for i in data] #print(len(x)) #self.clf.fit(x, labels) #self.scaler.fit(data[0]) #data = self.scaler.transform(data) self.clf.fit(x, labels) def predict(self, observation): """ Once trained, this method is called and expected to return a predicted behavior for the given observation. INPUTS observation - a 4 tuple with s, d, s_dot, d_dot. - Example: [3.5, 0.1, 8.5, -0.2] OUTPUT A label representing the best guess of the classifier. Can be one of "left", "keep" or "right". """ # TODO - complete this #i = self.scaler.transform([observation]) i = [observation[3]] #prediction = self.clf.predict([[i[1], i[2], i[3], i[1]%4]]) prediction = self.clf.predict(i) #print(prediction) return prediction
accuracy = knn.score(xtest, ytest) print(accuracy) # creating a confusion matrix knn_predictions = knn.predict(x_test) ''' ''' from sklearn.tree import DecisionTreeClassifier dtree_model = DecisionTreeClassifier(max_depth = 7).fit(xtrain, ytrain) dtree_predictions = dtree_model.predict(x_test) list=[] ''' from sklearn.naive_bayes import GaussianNB gnb = GaussianNB().fit(xtrain, ytrain) gnb_predictions = gnb.predict(x_test) # accuracy on X_test accuracy = gnb.score(xtest, ytest) print(accuracy) list = [] for i in gnb_predictions: ''' list.append(i) temp=list[i] list.append(out[temp]) ''' list.append(out[i]) print(out[i]) ##
data = digits.images.reshape((n_samples, -1)) #??? classifier = GaussianNB() #MLPClassifier(alpha=1, hidden_layer_sizes=(25, 15), random_state=1) #svm.SVC(gamma=1)#KNeighborsClassifier(3)#GaussianNB() filename = "naive_bayes.bin" #Traing model with labelled data!!! classifier.fit(data[:int(n_samples * 2 / 3)], digits.target[:int(n_samples * 2 / 3)]) #Save trained model to disk and reload it _ = joblib.dump(classifier, filename) classifier = joblib.load(filename) predicted = classifier.predict(data[int(n_samples / 3):]) expected = digits.target[int(n_samples / 3):] print("Classification report for classifier %s:\n%s\n" % (classifier, metrics.classification_report(expected, predicted))) images_and_predictions = list( zip(digits.images[int(n_samples / 3):], predicted)) x = randint(0, int(n_samples / 3)) #to show different examples each time for index, (image, prediction) in enumerate(images_and_predictions[x:x + 21]): plt.subplot(3, 7, index + 1) plt.axis('off') plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') plt.title('%i(%i)' % (prediction, expected[x + index])) plt.show()
previsores[:, 6] = labelencoder_previsores.fit_transform(previsores[:, 6]) previsores[:, 7] = labelencoder_previsores.fit_transform(previsores[:, 7]) previsores[:, 8] = labelencoder_previsores.fit_transform(previsores[:, 8]) previsores[:, 9] = labelencoder_previsores.fit_transform(previsores[:, 9]) previsores[:, 13] = labelencoder_previsores.fit_transform(previsores[:, 13]) onehotencoder = OneHotEncoder(categorical_features=[1, 3, 5, 6, 7, 8, 9, 13]) previsores = onehotencoder.fit_transform(previsores).toarray() labelencoder_classe = LabelEncoder() classe = labelencoder_classe.fit_transform(classe) from sklearn.preprocessing import StandardScaler scaler = StandardScaler() previsores = scaler.fit_transform(previsores) from sklearn.model_selection import train_test_split previsores_treinamento, previsores_teste, classe_treinamento, classe_teste = train_test_split( previsores, classe, test_size=0.15, random_state=0) from sklearn.naive_bayes import GaussianNB classificador = GaussianNB() classificador.fit(previsores_treinamento, classe_treinamento) # Resultado da previsão previsoes = classificador.predict(previsores_teste) # Verifica o percentual de acerto from sklearn.metrics import confusion_matrix, accuracy_score precisao = accuracy_score(classe_teste, previsoes) matriz = confusion_matrix(classe_teste, previsoes)
0].values.tolist( ) # Test test_x, test_y = read_test_class.iloc[:, 1:].values, read_test_class.iloc[:, 0].values.tolist( ) # ============================================================================= # TRADITIONAL MACHINE LEARNING ALGORITHMS # ============================================================================= print("Training Gaussian Naive Bayes classifier:") my_classifier = GaussianNB(priors=None) my_classifier.fit(train_x, train_y) pred_lbl_GNB = my_classifier.predict(test_x) # Prediction label/class pred_prb_GNB = my_classifier.predict_proba(test_x) # predict probability for all target labels print(" Training Random Forest classifier:") my_classifier = RandomForestClassifier(max_depth=10, n_estimators=30) my_classifier.fit(train_x, train_y) pred_lbl_RFC = my_classifier.predict(test_x) # Prediction label/class pred_prb_RFC = my_classifier.predict_proba(test_x) # predict probability for all target labels print(" Training Nearest Neighbors classifier:") n_neighbors = 100 # Optional (default = 5) weights = 'uniform' # str or callable, optional (default = 'uniform'), 'distance' algorithm = 'kd_tree' # {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
def test_gnb_prior_large_bias(): """Test if good prediction when class prior favor largely one class""" clf = GaussianNB(priors=np.array([0.01, 0.99])) clf.fit(X, y) assert clf.predict([[-0.1, -0.1]]) == np.array([2])
logreg = LogisticRegression() logreg.fit(X_train,y_train) pred_logreg = logreg.predict(X_test) print(confusion_matrix(y_test, pred_logreg)) print(classification_report(y_test, pred_logreg)) print(accuracy_score(y_test, pred_logreg)) logreg.fit(X_train_all, y_train_all) pred_all_logreg = logreg.predict(X_test_all) sub_logreg = pd.DataFrame() sub_logreg['PassengerId'] = df_test['PassengerId'] sub_logreg['Survived'] = pred_all_logreg #sub_logmodel.to_csv('logmodel.csv',index=False) from sklearn.naive_bayes import GaussianNB gnb=GaussianNB() gnb.fit(X_train,y_train) pred_gnb = gnb.predict(X_test) print(confusion_matrix(y_test, pred_gnb)) print(classification_report(y_test, pred_gnb)) print(accuracy_score(y_test, pred_gnb)) from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(n_neighbors=20) knn.fit(X_train_sc,y_train_sc) pred_knn = knn.predict(X_test) print(confusion_matrix(y_test, pred_knn)) print(classification_report(y_test, pred_knn)) print(accuracy_score(y_test, pred_knn)) knn.fit(X_train_all, y_train_all) pred_all_knn = knn.predict(X_test_all) sub_knn = pd.DataFrame() sub_knn['PassengerId'] = df_test['PassengerId']
checkpointer = ModelCheckpoint(filepath='best_weights.hdf5', verbose=1, save_best_only=True) model.fit(x_train, y_train, validation_data=(x_test, y_test), callbacks=[monitor, checkpointer], epochs=1) #print(history.history.keys()) feat_train = model.predict(x_train) feat_test = model.predict(x_test) gnb = GaussianNB() gnb.fit(feat_train, np.argmax(y_train, axis=1)) print("trainning score...", gnb.score(feat_train, np.argmax(y_train, axis=1))) print("testing score...", gnb.score(feat_test, np.argmax(y_test, axis=1))) pred_labels = gnb.predict(feat_test) probas = gnb.predict_proba(feat_test) confusion_matrix = metrics.confusion_matrix(np.argmax(y_test, axis=1), pred_labels) print("\n\nConfusion Matrix {} %".format(confusion_matrix)) classification_report = metrics.classification_report(np.argmax(y_test, axis=1), pred_labels, target_names=outcome) print("\n\nClassifiction Scores {} %".format(classification_report)) skplt.metrics.plot_precision_recall_curve(np.argmax(y_test, axis=1), probas) plt.show() skplt.metrics.plot_roc_curve(np.argmax(y_test, axis=1), probas) plt.show()
from sklearn.metrics import confusion_matrix accuracy_score(y_cv, pred_cv) matrix = confusion_matrix(y_cv, pred_cv) print(matrix) # In[72]: from sklearn.naive_bayes import GaussianNB nb = GaussianNB() nb.fit(x_train, y_train) # In[73]: pred_cv4 = nb.predict(x_cv) # In[74]: print("Accuracy:", metrics.accuracy_score(y_cv, pred_cv4)) # In[75]: pred_test = nb.predict(testdf) # In[85]: finaldf['Loan_Status'] = pred_test finaldf.head() # In[86]:
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25, random_state = 0) #Logistic Regression from sklearn.linear_model import LogisticRegression classifier = LogisticRegression(random_state = 0) classifier.fit(X_train, y_train) y_pred = classifier.predict(X_test) from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) from sklearn.metrics import accuracy_score accuracy = accuracy_score(y_pred, y_test) #Naive bayes from sklearn.naive_bayes import GaussianNB classifier_NB = GaussianNB() classifier_NB.fit(X_train, y_train) y_pred_NB = classifier_NB.predict(X_test) from sklearn.metrics import confusion_matrix cm_NB = confusion_matrix(y_test, y_pred) from sklearn.metrics import accuracy_score accuracy_NB = accuracy_score(y_pred, y_test)
def TrainModel(self): self.browser.clear() # Set Data Set X_train, X_test, y_train, y_test = self.X_train, self.X_test, self.y_train, self.y_test X_train1, X_test1, y_train1, y_test1 = X_train.values, X_test.values, y_train, y_test self.browser.append("Load Dataset") self.browser.append("") self.browser.append("") # LogisticRegression logreg = LogisticRegression() logreg.fit(X_train1, y_train1) y_pred_logreg = logreg.predict(X_test1) acc_log_train = round(logreg.score(X_train1, y_train1) * 100, 2) acc_log_test = round(logreg.score(X_test1, y_test1) * 100, 2) self.browser.append("<Logistic Regression Model>") self.browser.append("Train acc : " + str(acc_log_train) + "%") self.browser.append("Test acc : " + str(acc_log_test) + "%") self.browser.append("") #time.sleep(3) # Support Vector Machine's svc = SVC() svc.fit(X_train1, y_train1) y_pred_svc = svc.predict(X_test1) acc_svc_train = round(svc.score(X_train1, y_train1) * 100, 2) acc_svc_test = round(svc.score(X_test1, y_test1) * 100, 2) self.browser.append("<Support Vector Machine's>") self.browser.append("Train acc : " + str(acc_svc_train) + "%") self.browser.append("Test acc : " + str(acc_svc_test) + "%") self.browser.append("") #time.sleep(3) # Naive Bayes gaussian = GaussianNB() gaussian.fit(X_train1, y_train1) y_pred_gau = gaussian.predict(X_test1) acc_gau_train = round(gaussian.score(X_train1, y_train1) * 100, 2) acc_gau_test = round(gaussian.score(X_test1, y_test1) * 100, 2) self.browser.append("<Naive Bayes>") self.browser.append("Train acc : " + str(acc_gau_train) + "%") self.browser.append("Test acc : " + str(acc_gau_test) + "%") self.browser.append("") # K-Nearest Neighbours knn = KNeighborsClassifier(n_neighbors=3) knn.fit(X_train1, y_train1) y_pred_knn = knn.predict(X_test1) acc_knn_train = round(knn.score(X_train1, y_train1) * 100, 2) acc_knn_test = round(knn.score(X_test1, y_test1) * 100, 2) self.browser.append("<K-Nearest Neighbours>") self.browser.append("Train acc : " + str(acc_knn_train) + "%") self.browser.append("Test acc : " + str(acc_knn_test) + "%") self.browser.append("") # Decision Tree's dec = DecisionTreeClassifier() dec.fit(X_train1, y_train1) y_pred_dec = dec.predict(X_test1) acc_dec_train = round(dec.score(X_train1, y_train1) * 100, 2) acc_dec_test = round(dec.score(X_test1, y_test1) * 100, 2) self.browser.append("<Decision Tree's>") self.browser.append("Train acc : " + str(acc_dec_train) + "%") self.browser.append("Test acc : " + str(acc_dec_test) + "%") self.browser.append("") #sgd sgd = SGDClassifier(max_iter=10000) sgd.fit(X_train1, y_train1) y_pred_sgd = sgd.predict(X_test1) acc_sgd_train = round(sgd.score(X_train1, y_train1) * 100, 2) acc_sgd_test = round(sgd.score(X_test1, y_test1) * 100, 2) self.browser.append("<Stochastic Gradient Decent Classifier>") self.browser.append("Train acc : " + str(acc_sgd_train) + "%") self.browser.append("Test acc : " + str(acc_sgd_test) + "%") self.browser.append("") #Linear SVC l_svc = LinearSVC() l_svc.fit(X_train1, y_train1) y_pred_l_svc = l_svc.predict(X_test1) acc_l_svc_train = round(l_svc.score(X_train1, y_train1) * 100, 2) acc_l_svc_test = round(l_svc.score(X_test1, y_test1) * 100, 2) self.browser.append("<Linear Support Vector Machines>") self.browser.append("Train acc : " + str(acc_l_svc_train) + "%") self.browser.append("Test acc : " + str(acc_l_svc_test) + "%") self.browser.append("") #Perceptron per = Perceptron(max_iter=1000) per.fit(X_train1, y_train1) y_pred_per = per.predict(X_test1) acc_per_train = round(per.score(X_train1, y_train1) * 100, 2) acc_per_test = round(per.score(X_test1, y_test1) * 100, 2) self.browser.append("<Perceptron>") self.browser.append("Train acc : " + str(acc_per_train) + "%") self.browser.append("Test acc : " + str(acc_per_test) + "%") self.browser.append("") #Random Forest random_forest = RandomForestClassifier(n_estimators=100) random_forest.fit(X_train1, y_train1) y_pred_random_forest = random_forest.predict(X_test1) acc_random_forest_train = round( random_forest.score(X_train1, y_train1) * 100, 2) acc_random_forest_test = round( random_forest.score(X_test1, y_test1) * 100, 2) self.browser.append("<Random Forest>") self.browser.append("Train acc : " + str(acc_random_forest_train) + "%") self.browser.append("Test acc : " + str(acc_random_forest_test) + "%") self.browser.append("") models = pd.DataFrame({ 'Model': [ 'Support Vector Machines', 'KNN', 'Logistic Regression', 'Random Forest', 'Naive Bayes', 'Perceptron', 'Stochastic Gradient Decent', 'Linear SVC', 'Decision Tree' ], 'Score': [ acc_svc_test, acc_knn_test, acc_log_test, acc_random_forest_test, acc_gau_test, acc_per_test, acc_sgd_test, acc_l_svc_test, acc_dec_test ] }) models.sort_values(by='Score', ascending=True) models = PandasModelTrainData(models) self.tableView = QTableView() self.tableView.setSortingEnabled(True) self.tableView.setModel(models) self.tableView.setGeometry(850, 100, 320, 400) self.tableView.setColumnWidth(0, 200) self.tableView.sortByColumn(1, Qt.DescendingOrder) self.tableView.setWindowTitle("Accuracy") self.tableView.show()
test_size=0.25, random_state=0) # Feature Scaling from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) # Fitting Naive Bayes to the Training set from sklearn.naive_bayes import GaussianNB classifier = GaussianNB() classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) # Visualising the Training set results from matplotlib.colors import ListedColormap X_set, y_set = X_train, y_train colors = np.array(["red", "green"]) X1, X2 = np.meshgrid( np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01), np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1,
cm = confusion_matrix(y_test, y_pred) print(cm) # In[6]: from sklearn.naive_bayes import GaussianNB gnb = GaussianNB() start = time.time() gnb.fit(X_train, y_train) print('training completed in %s seconds' % (time.time() - start)) start = time.time() y_pred = gnb.predict(X_test) print('prediction completed in %s seconds' % (time.time() - start)) cm = confusion_matrix(y_test, y_pred) print(cm) # In[8]: from sklearn.tree import DecisionTreeClassifier dt = DecisionTreeClassifier(criterion='entropy') start = time.time() dt.fit(X_train, y_train) print('training completed in %s seconds' % (time.time() - start))
KNN_predict_prob = KNN.predict_proba(data_all_scaled) # Post-processing using Graph-Cut Seg_Label, seg_accuracy = Post_Processing(KNN_predict_prob,height,width,\ num_classes,y_test,test_indexes) print('(KNN) Train_Acc=%.3f, Cla_Acc=%.3f, Seg_Acc=%.3f(Time_cost=%.3f)'\ % (KNN.score(X_train_scaled,y_train),KNN.score(X_test_scaled,y_test),\ seg_accuracy, (time.time()-start_time))) # draw classification map draw(GT_Label, KNN_Label, Seg_Label, train_map, test_map) print('--------------------------------------------------------------------') # Naive Bayes: GaussianNB from sklearn.naive_bayes import GaussianNB start_time = time.time() GaussNB = GaussianNB().fit(X_train, y_train) GaussNB_Label = GaussNB.predict(data_all).reshape( width, height).astype(int).transpose(1, 0) GaussNB_predict_prob = GaussNB.predict_proba(data_all) # Post-processing using Graph-Cut Seg_Label, seg_accuracy = Post_Processing(GaussNB_predict_prob,height,width,\ num_classes,y_test,test_indexes) print('(GaussNB) Train_Acc=%.3f, Cla_Acc=%.3f, Seg_Acc=%.3f(Time_cost=%.3f)'\ % (GaussNB.score(X_train,y_train),GaussNB.score(X_test,y_test),\ seg_accuracy, (time.time()-start_time))) # draw classification map draw(GT_Label, GaussNB_Label, Seg_Label, train_map, test_map) print('--------------------------------------------------------------------') # discriminant_analysis - linear discriminant analysis from sklearn.discriminant_analysis import LinearDiscriminantAnalysis start_time = time.time() LDA = LinearDiscriminantAnalysis().fit(X_train, y_train)
# %% codecell # preprocess data x_train, x_test, y_train, y_test, sc_x = preprocessed_data.preprocess_data() # %% codecell # Fitting Naive Bayes to the Training set classifier = GaussianNB() classifier.fit(x_train, y_train) # %% codecell # Predicting the Test set results y_pred = classifier.predict(x_test) # %% codecell # Making the Confusion Matrix cm = confusion_matrix(y_test, y_pred) # %% codecell # Visualising the Training set results x_set, y_set = x_train, y_train X1, X2 = np.meshgrid(np.arange(start=x_set[:, 0].min() - 1, stop=x_set[:, 0].max() + 1, step=0.01), np.arange(start=x_set[:, 1].min() - 1, stop=x_set[:, 1].max() + 1, step=0.01)) plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha=0.75, cmap=ListedColormap(('red', 'green'))) plt.xlim(X1.min(), X1.max())
dup_df['Stage_cat'] = Stage_cat dup_df['Duration_cat'] = Duration_cat dup_df['CD4start_cat'] = CD4start_cat dup_df['CD4number_cat'] = CD4number_cat dup_df['CD4last_cat'] = CD4last_cat dup_df['Perform_cat'] = Perform_cat features = dup_df.values[:, :6] target = dup_df.values[:, 6] features_train, features_test, target_train, target_test = train_test_split( features, target, test_size=0.20, random_state=20) # print(features_train) clf = GaussianNB() clf.fit(features_train, target_train) target_pred = clf.predict(features_test) acc = accuracy_score(target_test, target_pred, normalize=True) PPS = { 'pp1': { 'gender': 1, 'who_stage': 1, 'duration': 1, 'start_cd4': 1, 'no_cd4_done': 1, 'recent_cd4': 1 } }
skplt.plot_confusion_matrix(yte, ypred) plt.show() # Read the data if not os.path.isfile('./xtr.npy') or \ not os.path.isfile('./xte.npy') or \ not os.path.isfile('./ytr.npy') or \ not os.path.isfile('./yte.npy'): xtr, xte, ytr, yte = getEmbeddings("datasets/train.csv") np.save('./xtr', xtr) np.save('./xte', xte) np.save('./ytr', ytr) np.save('./yte', yte) xtr = np.load('./xtr.npy') xte = np.load('./xte.npy') ytr = np.load('./ytr.npy') yte = np.load('./yte.npy') # Use the built-in Naive Bayes classifier gnb = GaussianNB() gnb.fit(xtr, ytr) y_pred = gnb.predict(xte) m = yte.shape[0] n = (yte != y_pred).sum() print("Accuracy = " + format((m - n) / m * 100, '.2f') + "%") # 72.94% # Draw the confusion matrix plot_cmat(yte, y_pred)
print("normal_error.shape",normal_error.shape) print("abno_error.shape",abno_error.shape) normal_error = np.c_[normal_error, np.zeros(len(normal_error))] abno_error = np.c_[abno_error, np.ones(len(abno_error))] dataset = np.r_[normal_error, abno_error] np.random.shuffle(dataset) train_x, test_x, train_y, test_y = train_test_split(dataset[:,:-1], dataset[:,-1], test_size=0.3, random_state=42) clf = GaussianNB() clf.fit(train_x, train_y) y_hat = clf.predict(train_x) y_score = clf.predict_proba(train_x) y_log_score = clf.predict_log_proba(train_x) y_test_hat = clf.predict(test_x) y_test_score = clf.predict_proba(test_x) print(accuracy_score(train_y, y_hat)) print(metrics.recall_score(train_y, y_hat)) print(metrics.classification_report(train_y, y_hat)) print(metrics.classification_report(test_y, y_test_hat)) print(y_score) print(y_test_score) print(y_test_hat) print(clf.classes_) # fpr, tpr, thresholds = metrics.roc_curve(train_y, y_hat) fpr, tpr, thresholds = metrics.roc_curve(test_y, y_test_score[:,-1])