Example #1
0
def main():
	config = dict()
	config['resource_dir'] = os.path.abspath(os.path.join(os.path.realpath(__file__), '../../')) + "/resources/"
	config['raw_file'] = config['resource_dir'] + "ideal_weight.csv"
	ideal_weight_df = None

	ideal_weight_df = pd.read_csv(config['raw_file'])
	ideal_weight_df.columns = [x.replace("\'","") for x in ideal_weight_df.columns.values.tolist()]
	
	ideal_weight_df.loc[:,'sex'] = ideal_weight_df['sex'].map(lambda x: x.replace("\'",""))
	#print ideal_weight_df
	#print config

	plt.hist(ideal_weight_df['actual'], alpha=0.5, label='actual')
	plt.hist(ideal_weight_df['ideal'], alpha=0.5, label='ideal')
	plt.show() # figure_1.png

	ideal_weight_df['diff'].hist()

	ideal_weight_df['sex_id'] = ideal_weight_df['sex'].map(lambda x: 1 if x == 'Male' else 0)

	clf = GaussianNB()
	clf.fit(ideal_weight_df[['actual','ideal','diff']],ideal_weight_df['sex'])

	print clf.predict([[145,160,-15]]) # male

	print clf.predict([[160,145,15]]) # female
Example #2
0
def test_classification():
    t = zeros(len(target))
    t[target == 'setosa'] = 1
    t[target == 'versicolor'] = 2
    t[target == 'virginica'] = 3

    from sklearn.naive_bayes import GaussianNB
    classifier = GaussianNB()
    classifier.fit(data,t) # training on the iris dataset

    print classifier.predict(data[0])
    print t[0]


    from sklearn import cross_validation
    train, test, t_train, t_test = cross_validation.train_test_split(data, t, test_size=0.4, random_state=0)

    classifier.fit(train,t_train) # train
    print classifier.score(test,t_test) # test

    from sklearn.metrics import confusion_matrix
    print confusion_matrix(classifier.predict(test),t_test)

    from sklearn.metrics import classification_report
    print classification_report(classifier.predict(test), t_test, target_names=['setosa', 'versicolor', 'virginica'])

    from sklearn.cross_validation import cross_val_score
    # cross validation with 6 iterations 
    scores = cross_val_score(classifier, data, t, cv=6)
    print scores

    from numpy import mean
    print mean(scores)
Example #3
0
 def MyNaiveBayes(object):
     pre = PreProcess()
     (training_value, test_value, test_pos_x, test_pos_y, training_pos_x, training_pos_y) = pre.split()
     # 模型初始化
     clf_x = GaussianNB()
     clf_y = GaussianNB()
     # 进行模型的训练
     clf_x.fit(training_value, training_pos_x)
     clf_y.fit(training_value, training_pos_y)
     # 计算结果
     result_pos_x = clf_x.predict(test_value)
     result_pos_y = clf_y.predict(test_value)
     '''
     print result_pos_x
     print test_pos_x
     print result_pos_y
     print test_pos_y
     '''
     # 计算误差
     x_dis = []
     y_dis = []
     d_dis = []
     for i in range(len(result_pos_x)):
         x_dis.append(abs(result_pos_x[i] - test_pos_x[i]))
         y_dis.append(abs(result_pos_y[i] - test_pos_y[i]))
         d_dis.append(math.sqrt((result_pos_x[i]-test_pos_x[i])**2+(result_pos_y[i]-test_pos_y[i])**2))
     x = (sum(x_dis))/len(result_pos_x)
     y = (sum(y_dis))/len(result_pos_y)
     d = (sum(d_dis))/len(d_dis)
     print x, y, d
     return x, y, d
class GaussianNBClassifier:

	def __init__(self):
		"""
		This is the constructor responsible for initializing the classifier
		"""
		self.outputHeader = "#gnb"
		self.clf = None

	def buildModel(self):
		"""
		This builds the model of the Gaussian NB classifier
		"""
		self.clf =  GaussianNB()

	def trainGaussianNB(self,X, Y):
		"""
		Training the Gaussian NB Classifier
		"""
		self.clf.fit(X, Y)

	def validateGaussianNB(self,X, Y):
		"""
		Validate the Gaussian NB Classifier
		"""
		YPred = self.clf.predict(X)
		print accuracy_score(Y, YPred)

	def testGaussianNB(self,X, Y):
		"""
		Test the Gaussian NB Classifier
		"""
		YPred = self.clf.predict(X)
		print accuracy_score(Y, YPred)
def NB(text):
    ### features_train and features_test are the features for the training
    ### and testing datasets, respectively
    ### labels_train and labels_test are the corresponding item labels
    features_train, features_test, labels_train, labels_test = Preprocess()
    Ifeatures_train,Ifeatures_test,Ilabels_train=preprocess_input([text])

    # classification goes here

    clf = GaussianNB()

    # training
    train_t0 = time()
    clf.fit(features_train, labels_train)
    train_t1 = time()

    # prediction or testing
    test_t0 = time()
    predict = clf.predict(features_test)
    test_t1 = time()

    print "accuracy: ", clf.score(features_test, labels_test)
    print "#################################"
    print "tain time: ", round(train_t1 - train_t0, 3), "s"
    print "prediction time: ", round(test_t1 - test_t0, 3), "s"

    print "#################################"

    clf.fit(Ifeatures_train,Ilabels_train)
    print ("prediction of ",str(clf.predict(Ifeatures_test))[1])

    #print "prediction of ", clf.predict(preprocess_input(text))
    return  str(clf.predict(Ifeatures_test))[1]
Example #6
0
class GaussianNBLearner(AbstractLearner):
    """
    Gaussian Naive Bayes Learner

    http://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html

    We need to use X.toarray() because those functions expect dense arrays.
    """

    def __init__(self):
        self.nb = GaussianNB()

    def train(self, X, Y):
        if hasattr(X, 'toarray'):
            self.nb.fit(X.toarray(), Y)
        else:
            self.nb.fit(X, Y)

    def predict(self, X):
        if (hasattr(X, "toarray")):
            return self.nb.predict(X.toarray())
        else:
            return self.nb.predict(X)

    def score(self, X, Y):
        return np.mean(np.abs(self.nb.predict(X) - np.array(Y)))
Example #7
0
def naive_bayes(train_features, train_labels, test_features, test_labels):
    # Train SVM classifier
    model = GaussianNB()
    model.fit(train_features, train_labels)
    test_results = model.predict(test_features)
    train_results = model.predict(train_features)

    return (test_results, train_results)
Example #8
0
class NBMatcher(MLMatcher):
    def __init__(self, *args, **kwargs):
        super(NBMatcher, self).__init__(*args, **kwargs)
        self.clf = GaussianNB(*args, **kwargs)
    def fit(self, X, Y):
        self.clf.fit(X, Y)
    def predict(self, X):
        self.clf.predict(X)
def bayes_test():
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    Y = np.array([1, 1, 1, 2, 2, 2])
    clf = GaussianNB()
    clf.fit(X, Y)
    print(clf.predict([[-0.8, -1]]))
    clf_pf = GaussianNB()
    clf_pf.partial_fit(X, Y, np.unique(Y))
    print(clf_pf.predict([[-0.8, -1]]))
def classify(features_train, labels_train, features_test, labels_test):
  classifier = GaussianNB()
  t0 = time()
  classifier.fit(features_train, labels_train)
  print "training time: ", round(time() - t0), "s"
  t1 = time()
  classifier.predict(features_test)
  print "predicting time: ", round(time() - t1), "s"
  return classifier.score(features_test, labels_test)
Example #11
0
class TreeClassifier(Classifier):

    def __init__(self):
        self.classifier = GaussianNB()

    def do_train(self, X, y):
        self.classifier.fit(X, y)

    def do_classification(self, X, y):
        self.classifier.predict(X, y)
class NaiveBayes:
    __theta = 0
    __sigma = 0

    def __init__(self):
        pass 
        #self.__new_data = 0

    def learning(self,x_data,y_data):
        self.rssi = np.loadtxt(x_data, delimiter=',')
        print(self.rssi)

        self.position = np.loadtxt(y_data, delimiter=',')
        print(self.position)

        self.gaussian_nb = GaussianNB()

        from sklearn.cross_validation import train_test_split
        rssi_train, rssi_test, position_train, position_test = train_test_split(self.rssi, self.position, random_state=0)

        self.gaussian_nb.fit(rssi_train,position_train)
        print("theta",self.gaussian_nb.theta_)
        print("sigma",self.gaussian_nb.sigma_)

        predicted = self.gaussian_nb.predict(rssi_test)

        print(metrics.accuracy_score(position_test, predicted))
    '''
    def set_params(self,theta,sigma):
        __theta = theta
        __sigma = sigma
        print __theta
        print __sigma
        '''

    def inference(self,r_data):
        self.predicted_class = self.gaussian_nb.predict(r_data)

        post_prob = self.gaussian_nb.predict_proba(r_data)
        log_prob = self.gaussian_nb.predict_log_proba(r_data)
        self.post_prob_float16 = post_prob.astype(np.float16)
        #E = 1*self.post_prob_float16[0][0]+2*self.post_prob_float16[0][1]+3*self.post_prob_float16[0][2]
        #var = (1*self.post_prob_float16[0][0]+4*self.post_prob_float16[0][1]+9*self.post_prob_float16[0][2])-E**2
        #print(self.post_prob_float16)
        #print(self.post_prob_float16[0])
        #print(var)
        print(self.predicted_class)
        #print(self.gaussian_nb.class_prior_)
        #print(log_prob)

        return self.predicted_class

    def output(self):
        output = graph.Graph()
        output.bar_graph(self.post_prob_float16[0])
Example #13
0
def predict_author(arr, yazar_features, yazar_classes):
    results = []

    print "\n[DEBUG] K-NN result (neighbors: 10)"
    knn = KNeighborsClassifier(n_neighbors=10)
    knn.fit(yazar_features, yazar_classes)
    print knn.predict(arr)
    results.append(knn.predict(arr)[0])

    print "\n[DEBUG] SVC result (linear) (degree=3)"
    svc = svm.SVC(kernel='linear', degree=3)
    svc.fit(yazar_features, yazar_classes)
    print svc.predict(arr)
    results.append(svc.predict(arr)[0])

    print "\n[DEBUG] Logistic Regression result ()"
    regr = linear_model.LogisticRegression()
    regr.fit(yazar_features, yazar_classes)
    print regr.predict(arr)
    results.append(regr.predict(arr)[0])

    print "\n[DEBUG] Gaussian Naive Bayes"
    gnb = GaussianNB()
    gnb.fit(yazar_features, yazar_classes)
    print gnb.predict(arr)
    results.append(gnb.predict(arr)[0])

    print "\n[DEBUG] Decision Tree Classifier"
    dtc = tree.DecisionTreeClassifier()
    dtc.fit(yazar_features, yazar_classes)
    print dtc.predict(arr)
    results.append(dtc.predict(arr)[0])

    print "\n[DEBUG] Gradient Boosting Classification"
    gbc = GradientBoostingClassifier()
    gbc.fit(yazar_features, yazar_classes)
    print gbc.predict(arr)
    results.append(gbc.predict(arr)[0])

    # output = open('features.pkl', 'wb')
    # pickle.dump(yazar_features, output)
    # output.close()

    # output = open('classes.pkl', 'wb')
    # pickle.dump(yazar_classes, output)
    # output.close()

    # test_yazar_features = []        # for test data
    # test_yazar_classes = []         # for test classes
    # # yazar_features = []             # for train data
    # # yazar_classes = []              # for train classes

    return results
def trainer(dataset = "Features.csv"):
    # Train the various machine learning algorithms using the features extracted.
    data, labels = extractor(dataset)
    train, test, train_labels, test_labels = train_test_split(data, labels, test_size = 0.20, random_state = 42)
    names, expected_results = zip(*test_labels)
    names1, train_labels = zip(*train_labels)
    
    print 'S' + '\t' + 'H' + '\t' + 'F' + '\t' + 'A' + '\t' + 'N'
    
    # Random Forest Classifier
    rf = RandomForestClassifier(n_estimators = 100, n_jobs = 2)
    rf.fit(train, train_labels)
    results_boosting = rf.predict(test)
    conf_matrix = confusion_matrix(expected_results, results_boosting)
    print "Forset Classifier:\n"
    print conf_matrix
    accuracy_Boosting = float(np.trace(conf_matrix))/float(np.sum(conf_matrix))
    print accuracy_Boosting

    # KNN Classifier
    neigh = KNeighborsClassifier(n_neighbors=3)
    neigh.fit(train, train_labels)
    results_KNN = neigh.predict(test)
    conf_matrix = confusion_matrix(expected_results, results_KNN)
    print "KNN Classifier:\n"
    print conf_matrix
    accuracy_KNN = float(np.trace(conf_matrix))/float(np.sum(conf_matrix))
    print accuracy_KNN

    # Baye's Classifier
    clf = GaussianNB()
    clf.fit(train, train_labels)
    results_Bayes = clf.predict(test)
    conf_matrix = confusion_matrix(expected_results, results_Bayes)
    print "\nBayes Classifier:\n"
    print conf_matrix
    accuracy_Bayes = float(np.trace(conf_matrix))/float(np.sum(conf_matrix))
    print accuracy_Bayes

    # Neural Network
    clf = BernoulliNB()
    clf.fit(train, train_labels)
    results_NN = clf.predict(test)
    conf_matrix = confusion_matrix(expected_results, results_NN)
    print "\nNeural Network:\n"
    print conf_matrix
    accuracy_NN = float(np.trace(conf_matrix))/float(np.sum(conf_matrix))
    print accuracy_NN

    documenter(names, results_boosting, results_Bayes, results_NN, results_KNN, accuracy_Boosting, accuracy_Bayes, accuracy_NN, accuracy_KNN)
def NBAccuracy(features_train, labels_train, features_test, labels_test):
    """ compute the accuracy of your Naive Bayes classifier """
    ### import the sklearn module for GaussianNB
    from sklearn.naive_bayes import GaussianNB

    ### create classifier
    clf = GaussianNB()
    

    ### fit the classifier on the training features and labels
    clf.fit(features_train, labels_train)
    

    ### use the trained classifier to predict labels for the test features
    pred = clf.predict(features_test)


    ### calculate and return the accuracy on the test data
    ### this is slightly different than the example, 
    ### where we just print the accuracy
    ### you might need to import an sklearn module
    intersect = [i for i, j in zip(pred, labels_test) if i == j]
    matched = len(intersect)
    total = len(labels_test)
    accuracy = float(matched) / float(total)
    return accuracy
class GaussianColorClassifier(ContourClassifier):
    '''
    A contour classifier which classifies a contour
    based on it's mean color in BGR, HSV, and LAB colorspaces,
    using a Gaussian classifier for these features.

    For more usage info, see class ContourClassifier
    '''
    FEATURES = ['B', 'G', 'R', 'H', 'S', 'V', 'L', 'A', 'B']

    def __init__(self, classes, **kwargs):
        super(GaussianColorClassifier, self).__init__(classes, **kwargs)
        self.classifier = GaussianNB()

    def get_features(self, img, mask):
        mean = cv2.mean(img, mask)
        mean = np.array([[mean[:3]]], dtype=np.uint8)
        mean_hsv = cv2.cvtColor(mean, cv2.COLOR_BGR2HSV)
        mean_lab = cv2.cvtColor(mean, cv2.COLOR_BGR2LAB)
        features = np.hstack((mean.flatten(), mean_hsv.flatten(), mean_lab.flatten()))
        return features

    def classify_features(self, features):
        return self.classifier.predict(features)

    def feature_probabilities(self, features):
        return self.classifier.predict_proba(features)

    def train(self, features, classes):
        self.classifier.fit(features, classes)
Example #17
0
def categorize(train_data,test_data,train_class,n_features):
    #cf= ExtraTreesClassifier()
    #cf.fit(train_data,train_class)
    #print (cf.feature_importances_)
    
    #lsvmcf = sklearn.svm.LinearSVC(penalty='l2', loss='l2', dual=True, tol=0.0001, C=100.0)  
    model = LogisticRegression()
    lgr = LogisticRegression(C=100.0,penalty='l1')    
    #knn = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=10, p=2, metric='minkowski', metric_params=None)
    svmlcf = sklearn.svm.SVC(C=1000.0, kernel='linear', degree=1, gamma=0.01,  probability=True)#2
    svmcf = sklearn.svm.SVC(C=1000.0, kernel='rbf', degree=1, gamma=0.01,  probability=True)#2
    cf = DecisionTreeClassifier() 
    dct = DecisionTreeClassifier(criterion='gini', splitter='best',  min_samples_split=7, min_samples_leaf=4)
    rf = RandomForestClassifier(n_estimators=10, criterion='gini',  min_samples_split=7, min_samples_leaf=4, max_features='auto')
    gnb = GaussianNB()  #1
    adbst = sklearn.ensemble.AdaBoostClassifier(base_estimator=rf, n_estimators=5, learning_rate=1.0, algorithm='SAMME.R', random_state=True)

    #ch2 = SelectKBest(chi2, k=n_features)
    #train_data = ch2.fit_transform(train_data, train_class)
    #test_data = ch2.transform(test_data)

    #rfe = RFE(svmlcf,n_features)
    #rfe = rfe.fit(train_data, train_class)
    gnb.fit(train_data,train_class)
    return gnb.predict(test_data)
Example #18
0
def NBAccuracy(features_train, labels_train, features_test, labels_test):
    """ compute the accuracy of your Naive Bayes classifier """
    ### import the sklearn module for GaussianNB
    from sklearn.naive_bayes import GaussianNB

    ### create classifier
    clf = GaussianNB()

    ### fit the classifier on the training features and labels
    clf.fit(features_train, labels_train)

    ### use the trained classifier to predict labels for the test features
    pred = clf.predict(features_test)
    


    ### calculate and return the accuracy on the test data
    ### this is slightly different than the example, 
    ### where we just print the accuracy
    ### you might need to import an sklearn module
    total = len(labels_test)
    correct = (pred == labels_test).sum()
    accuracy = correct/float(total)
    from sklearn.metrics import accuracy_score
    
    accuracy = accuracy_score(labels_test,pred )
    return accuracy
Example #19
0
def NBAccuracy(features_train, labels_train, features_test, labels_test):
    """ compute the accuracy of your Naive Bayes classifier """
    ### import the sklearn module for GaussianNB
    from sklearn.naive_bayes import GaussianNB

    ### create classifier
    clf = GaussianNB()

    t0 = time()
    ### fit the classifier on the training features and labels
    clf.fit(features_train, labels_train)
    print "training time:", round(time()-t0, 3), "s"

    ### use the trained classifier to predict labels for the test features
    import numpy as np
    t1 = time()
    pred = clf.predict(features_test)
    print "predicting time:", round(time()-t1, 3), "s"

    ### calculate and return the accuracy on the test data
    ### this is slightly different than the example,
    ### where we just print the accuracy
    ### you might need to import an sklearn module
    accuracy = clf.score(features_test, labels_test)
    return accuracy
Example #20
0
def main(argv):
    if len(argv) != 5:
        print "./NB_train_pred.py train.csv train_lable test.csv save_folder label_idx"
        sys.exit(1);

    output_folder = argv[3]
    label_idx = int(argv[4])

    os.system("mkdir " + output_folder)

    print "Loading training data"
    train_array = np.load(argv[0])
    print "Loading training label"
    train_label_array = np.load(argv[1])
    print "Loading test data"
    test_array = np.load(argv[2])
    
    print "building NB on label " + str(label_idx)
    gnb = GaussianNB() 
    model = gnb.fit(train_array[:, 1:], train_label_array[1:, label_idx]) 

    print "predicting label " + str(label_idx)
    nb_pred = gnb.predict(test_array[:,1:])
    print "save the result"
    with open(output_folder + "/" + str(label_idx) + ".pred", 'w') as pred_file:
        pred_file.write("\n".join([ str(x) for x in nb_pred.tolist()]))
    with open(output_folder+"/"+str(label_idx) + ".npy", 'wb') as npy_file:
        np.save(npy_file, nb_pred)
Example #21
0
def gnbmodel(d,X_2,y_2,X_3,y_3,X_test,y_test):
    X_3_copy = X_3.copy(deep=True)
    X_3_copy['chance']=0
    index = 0    
    
########## k折交叉验证 ###########################
    scores = cross_val_score(GaussianNB(), X_2, y_2, cv=5, scoring='accuracy')
    score_mean =scores.mean()
    print(d+'5折交互检验:'+str(score_mean))
#################################################
    
    gnb = GaussianNB().fit(X_2,y_2)

################ 预测测试集 ################   
    answer_gnb = gnb.predict(X_test)
    accuracy = metrics.accuracy_score(y_test,answer_gnb)
    print(d+'预测:'+str(accuracy))
###############################################
    
    chance = gnb.predict_proba(X_3)[:,1]
    for c in chance:
        X_3_copy.iloc[index,len(X_3_copy.columns)-1]=c
        index += 1
    chance_que = X_3_copy.iloc[:,len(X_3_copy.columns)-1]
    return chance_que
Example #22
0
def NBAccuracy(features_train, labels_train, features_test, labels_test):
	#Import sklearn modules for GaussianNB
	from sklearn.naive_bayes import GaussianNB
	from sklearn.metrics import accuracy_score
	
	#Create classifer
	classifer = GaussianNB();
	
	#Timing fit algorithm
	t0 = time();
	
	#Fit classier on the training features
	classifer.fit(features_train, labels_train);
	
	print "Training Time: ", round(time() - t0, 3), "s";
	
	GaussianNB();
	
	#Timing prediction algorithm
	t0=time();
	
	#Use trained classifer to predict labels for test features
	pred = classifer.predict(features_test);
	
	print "Prediction Time: ", round(time() - t0, 3), "s";
	
	#Calculate accuracy from features_test with answer in labels_test
	
	accuracy = accuracy_score(pred, labels_test);
	
	return accuracy;
Example #23
0
def classifier(model,X,X1,y,y1):
    t0 = time.time()
    if model=='gnb':
        print 'GNB'
        gnb = GaussianNB().fit(X, y)
    elif model=='mnb':
        print 'MNB'
        gnb = MultinomialNB().fit(X,y)
    elif model=='bnb':
        print 'BNB'
        gnb = BernoulliNB().fit(X, y)
    elif model=='lin':
        print 'Linear SVM'
        gnb = svm.SVC(kernel='linear', C=0.5).fit(X, y)
    elif model=='rbf':
        print 'RBF SVM'
        gnb = svm.SVC().fit(X, y)
    elif model=='poly':
        print 'Poly SVM'
        gnb = svm.SVC(kernel='poly', degree=2).fit(X, y)
    elif model=='rfc':
        print 'Random Forest'
        gnb = RandomForestClassifier(max_depth=10, n_estimators=100, max_features=5).fit(X, y)
    elif model=='lr':
        print 'Logistic Regression'
        gnb = LogisticRegression().fit(X, y)
    elif model=='knn':
        print "K nearest neighbours"
        gnb = KNeighborsClassifier(n_neighbors=6).fit(X, y)
    y_pred = gnb.predict(X1)
    print accuracy_score(y1, y_pred), f1_score(y1, y_pred)
    print time.time() - t0
Example #24
0
File: nb.py Project: mkdmkk/infaas
class PatternBasedDiagnosis:
    """
    Pattern Based Diagnosis with Decision Tree
    """

    __slots__ = [
        "model"
    ]

    def __init__(self):
        pass

    def train(self, data, labels):
        """
        Train the decision tree with the training data
        :param data:
        :param labels:
        :return:
        """
        print('Training Data: %s' % (data))
        print('Training Labels: %s' % (labels))
        self.model = GaussianNB()
        self.model = self.model.fit(data, labels)

    def eval(self, obs):
        # print('Testing Result: %s; %s' % (self.model.predict(obs), self.model.predict_proba(obs)))
        print('Testing Result: %s' % self.model.predict(obs))
Example #25
0
def getGaussianPred(featureMatrix, labels, testSet, testSet_docIndex):
    """
    All input arguments are return of getTrainTestData()
    :param featureMatrix:
    :param labels:
    :param testSet:
    :param testSet_docIndex:
    :return docIndexPred: dict{docid: [index1, index2, ...], ...}
                        key is docid
                        value is all cognates' index
    """
    gnb = GaussianNB()
    gnb.fit(featureMatrix, labels)
    # pred = gnb.predict(featureMatrix)
    pred = gnb.predict(testSet)

    docIndexPred = dict()

    for i, p in enumerate(pred):
        if p:
            docid = testSet_docIndex[i, 0]
            index = testSet_docIndex[i, 1]
            if docid in docIndexPred:
                docIndexPred[docid].append(index)
            else:
                docIndexPred[docid] = [index]

    return docIndexPred
Example #26
0
class RegularizedGaussianNB:
  """
  Three types of regularization are possible:
    - regularized the variance of a feature within a class toward the 
      average variance of all features from that class
    - regularize the variance of a feature within a class toward its
      pooled variance across all classes
    - add some constant amount of variance to each feature
  In practice, the latter seems to work the best, though the regularization
  value should be cross-validated. 
  """
  def __init__(self, avg_weight = 0, pooled_weight = 0, extra_variance = 0.1):
    self.pooled_weight = pooled_weight
    self.avg_weight = avg_weight
    self.extra_variance = extra_variance
    self.model = GaussianNB()
    
  def fit(self, X,Y):
    self.model.fit(X,Y)
    p = self.pooled_weight
    a = self.avg_weight
    ev = self.extra_variance 
    original_weight = 1.0 - p - a
    pooled_variances = np.var(X, 0)
    for i in xrange(self.model.sigma_.shape[0]):
      class_variances = self.model.sigma_[i, :]
      new_variances = original_weight*class_variances + \
        p * pooled_variances + \
        a * np.mean(class_variances) + \
        ev 
      self.model.sigma_[i, :] = new_variances
        
        
  def predict(self, X):
    return self.model.predict(X)
def univariateFeatureSelection(f_list, my_dataset):
	result = []
	for feature in f_list:
		# Replace 'NaN' with 0
		for name in my_dataset:
			data_point = my_dataset[name]
			if not data_point[feature]:
				data_point[feature] = 0
			elif data_point[feature] == 'NaN':
				data_point[feature] =0

		data = featureFormat(my_dataset, ['poi',feature], sort_keys = True, remove_all_zeroes = False)
		labels, features = targetFeatureSplit(data)
		features = [abs(x) for x in features]
		from sklearn.cross_validation import StratifiedShuffleSplit
		cv = StratifiedShuffleSplit(labels, 1000, random_state = 42)
		features_train = []
		features_test  = []
		labels_train   = []
		labels_test    = []
		for train_idx, test_idx in cv:
			for ii in train_idx:
				features_train.append( features[ii] )
				labels_train.append( labels[ii] )
			for jj in test_idx:
				features_test.append( features[jj] )
				labels_test.append( labels[jj] )
		from sklearn.naive_bayes import GaussianNB
		clf = GaussianNB()
		clf.fit(features_train, labels_train)
		predictions = clf.predict(features_test)
		score = score_func(labels_test,predictions)
		result.append((feature,score[0],score[1],score[2]))
	result = sorted(result, reverse=True, key=lambda x: x[3])
	return result
Example #28
0
class CruiseAlgorithm(object):
	# cruise algorithm is used to classify the cruise phase vs noncruise phase, it uses the differential change in data stream as the input matrix
	def __init__(self, testing=False):
		self.core = GaussianNB()
		self.scaler = RobustScaler()
		self.X_prev = None
		self.testing = testing
	def fit(self,X,Y): # Y should be the label of cruise or not
		X = self.prepare(X)
		self.core.fit(X,Y.ravel())
	def predict(self, X):
		if self.testing:
			X_t = self.prepare(X)
		else:
			if self.X_prev:
				X_t = X - self.X_prev
			else:
				X_t = X
			self.X_prev = X

		print repr(X_t)
		prediction_result = self.core.predict(X_t)
		return np.asmatrix(prediction_result)

	def prepare(self,X):
		a = np.zeros((X.shape[0],X.shape[1]))
		for i in xrange(X.shape[0]-1):
			a[i+1,:] = X[i+1] - X[i]
		return a
Example #29
0
def myClassifier(X,Y,model,CV=4, scoreType='pure'):
    # X = [[0, 0], [1, 1],[1, 2]]
    # y = [0, 1, 2]
    score = {}
    print "Error Analysis using", scoreType
    if model == "SVM":
        clf = svm.SVC(probability=True, random_state=0, kernel='rbf')        
        #clf = svm.SVR(cache_size=7000)        
        
    elif model == "LR":
        clf = linear_model.LogisticRegression()
        clf.fit(X, Y)        

    elif model == "NB":
         clf = GaussianNB()
         clf.fit(X, Y)
         
    elif model=='MLP': # multilayer perceptron
         clf = MLPClassifier( hidden_layer_sizes=[100],algorithm='l-bfgs')
         clf.fit(X, Y)
    
    if scoreType == 'cv':     
        accu = np.mean(cross_validation.cross_val_score(clf, X, Y, scoring='accuracy',cv=CV))
    elif scoreType == 'pure':  
        predictions=clf.predict(X)
        accu = sum([int(predictions[q]==Y[q]) for q in range(len(Y))])/len(Y)        
    return accu, clf
def selectKBest(previous_result, data):
	# remove 'restricted_stock_deferred' and 'director_fees'
	previous_result.pop(4)
	previous_result.pop(4)

	result = []
	_k = 10
	for k in range(0,_k):
		feature_list = ['poi']
		for n in range(0,k+1):
			feature_list.append(previous_result[n][0])

		data = featureFormat(my_dataset, feature_list, sort_keys = True, remove_all_zeroes = False)
		labels, features = targetFeatureSplit(data)
		features = [abs(x) for x in features]
		from sklearn.cross_validation import StratifiedShuffleSplit
		cv = StratifiedShuffleSplit(labels, 1000, random_state = 42)
		features_train = []
		features_test  = []
		labels_train   = []
		labels_test    = []
		for train_idx, test_idx in cv:
			for ii in train_idx:
				features_train.append( features[ii] )
				labels_train.append( labels[ii] )
			for jj in test_idx:
				features_test.append( features[jj] )
				labels_test.append( labels[jj] )
		from sklearn.naive_bayes import GaussianNB
		clf = GaussianNB()
		clf.fit(features_train, labels_train)
		predictions = clf.predict(features_test)
		score = score_func(labels_test,predictions)
		result.append((k+1,score[0],score[1],score[2]))
	return result
Example #31
0
    with open('temp.apk', 'wb') as f:
        while size > 0:
            data = client.recv(1024)
            f.write(data)
            size -= len(data)
    print('APK Saved')
    ap = apk.APK('Apps/temp.apk')
    per = ap.get_permissions()
    permissions = []

    for line in per:
        curr = ''
        for i in reversed(line):
            if i != '.':
                curr += i
            else:
                break
        curr = curr[::-1]
        permissions.append(curr)
    P = np.genfromtxt('Training/Perdiction.csv', delimiter=',')
    for i in permissions:
        if get_index.get(i) != None:
            P[get_index.get(i)] = 1.0

    result = ''
    if clf.predict([P]) == 0.0:
        result = 'Non-Malicious'
    else:
        result = 'Malicious'
    client.sendall(result)
    client.close()
Example #32
0
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, confusion_matrix
dataset=pd.read_csv('divorce.csv',delimiter=";")
a=dataset.drop_duplicates()


print("DUPLICATE SONRASI YENİ VERİ SAYIMIZ:")
print(len(a))

X=dataset.iloc[:,0:54]
y=dataset["Class"]
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20, random_state=1)
gaussian_bayes = GaussianNB()
gaussian_bayes.fit(X_train,y_train.values.ravel())
y_pred = gaussian_bayes.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))
print("CONFUSION MATRIX")
print(confusion_matrix(y_test, y_pred))


print("Accuracy:",metrics.accuracy_score(y_test, y_pred))


#PRECIOSN SCORE
from sklearn.metrics import precision_score, roc_auc_score

print("Precision")
print(precision_score(y_test, y_pred, average='weighted'))
Example #33
0
# Read pixel values into X, read class values into y
df_X = pandas.read_csv("../../data/x_train_gr_smpl.csv")
df_y = pandas.read_csv("../../data/y_train_smpl.csv")

# Shuffle the order of the data (keeping the X and y rows in sync)
df_X, df_y = shuffle(df_X, df_y)

# Split dataset into training and testing set, 90% and 10%, respectively
X_train, X_test, y_train, y_test = train_test_split(df_X,
                                                    df_y,
                                                    test_size=0.1,
                                                    random_state=0)

naive_bayes = GaussianNB()
classifier = naive_bayes.fit(X_train, y_train)
y_predicted = naive_bayes.predict(X_test)
print("\nNaive Bayes accuracy score: ",
      round(metrics.accuracy_score(y_test, y_predicted) * 100, 2), "%\n")

# Plot non-normalized confusion matrix
labels = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]

np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
titles_options = [("Confusion matrix, without normalization", None),
                  ("Normalized confusion matrix", 'true')]

for title, normalize in titles_options:
    disp = plot_confusion_matrix(classifier,
                                 X_test,
Example #34
0
                                      outcome_feature,
                                      test_size=0.5,
                                      random_state=0)

###
### Define Classifier
###

clf = GaussianNB()

###
### Train Classifier on (X1,Y1) and Validate on (X2,Y2)
###

clf.fit(X_1, Y_1)
score = clf.score(X_2, Y_2)
print("accuracy: {0}".format(score.mean()))

###
### Print Confusion Matrix
###

output = clf.predict(X_2)

matrix = confusion_matrix(output, Y_2)
print(matrix)

###
### Save Classifier
###
joblib.dump(clf, 'model/nb.pkl')
def NB(train, test, pred):
    naive = GaussianNB()
    naive.fit(train, pred)
    return naive.predict(test)
Example #36
0
y_train_labeled = train_labeled['y']
x_train_labeled = train_labeled._drop_axis(['y'], axis=1)
x_train_unlabeled = train_unlabeled

#Switch to numpy
# Preprocessing X
x_train = []
x_train_labeled = np.array(x_train_labeled)
x_train_unlabeled = np.array(x_train_unlabeled)
x_train.extend(x_train_labeled)
x_train.extend(x_train_unlabeled)
x_test = np.array(test)

# Preprocessing y
y_train_labeled = np.array(y_train_labeled)
ones = -1 * np.ones(21000)
ones = np.array(ones)
y_train = np.concatenate((y_train_labeled, ones))

# Trying Gaussian Naive Bayes
gnb = GaussianNB()
gnb.fit(x_train, y_train)
y_pred = gnb.predict(x_test)

# output results
d = {'Id': test.index, 'y': y_pred}
output = pd.DataFrame(d)
output.to_csv('output1.csv', index=False)

# from sklearn.metrics import accuracy_score
# acc = accuracy_score(y, y_pred)
# Entrenamiento Supervisado: Clasificacion de Iris

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Cargamos los datos
iris = sns.load_dataset('iris')
x_iris = iris.drop(
    'species', axis='columns')  # Eliminamos el campo especie de las columnas
y_iris = iris['species']

# Dividimos los datos en dos conjuntos: entrenamiento y testeo
xtrain, xtest, ytrain, ytest = train_test_split(x_iris, y_iris, random_state=1)
model = GaussianNB()
print("Entrenando el Modelo GaussianNB...")
model.fit(xtrain, ytrain)
print("Evaluando nuevos datos...")
ymodel = model.predict(xtest)
print("Precision final: {}".format(accuracy_score(ytest, ymodel)))
conducted between 1958 and 1970 at the University 
of Chicago's Billings Hospital on the survival of 
patients who had undergone surgery for cancer

1. Age of patient at time of operation (numerical) 
2. Patient's year of operation (year - 1900, numerical) 
3. Number of positive axillary nodes detected (numerical) 
4. Survival status (class attribute) 
-- 1 = the patient survived 5 years or longer 
-- 2 = the patient died within 5 year
'''
c1, c2, c3, c4 = np.loadtxt('data.csv', unpack=True, delimiter=',')
x = np.column_stack((c1, c3))
y = c4
# Create NaiveBayes Classifier
clf = GaussianNB()
# fit the mode
clf.fit(x, y)
# make predictions
predictions = clf.predict(x)

# calculate accuracy
print(accuracy_score(y, predictions))

from matplotlib import pyplot as plt

plt.scatter(c1, c3, c=c4)
plt.colorbar(ticks=[1, 2])
plt.xlabel("Age of the patient")
plt.ylabel("No of positive axillary nodes")
Example #39
0
                                                    test_size=0.3,
                                                    random_state=109)

# In[36]:

X_train.shape, X_test.shape, y_train.shape, y_test.shape

# In[37]:

# Train the model using the training sets
model.fit(X_train, y_train)

# In[38]:

#Predict the response for test dataset
y_pred = model.predict(X_test)

# In[39]:

#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics

# Model Accuracy, how often is the classifier correct?
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

# In[42]:

## Apply Algorithm

from sklearn.ensemble import RandomForestClassifier
random_forest_model = RandomForestClassifier(random_state=10)
# -*- coding: utf-8 -*-

import pandas as pd

base = pd.read_csv('risco_credito.csv')
previsores = base.iloc[:, 0:4].values
classe = base.iloc[:, 4].values

from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder

labelEncoder = LabelEncoder()

previsores[:, 0] = labelEncoder.fit_transform(previsores[:, 0])
previsores[:, 1] = labelEncoder.fit_transform(previsores[:, 1])
previsores[:, 2] = labelEncoder.fit_transform(previsores[:, 2])
previsores[:, 3] = labelEncoder.fit_transform(previsores[:, 3])

classificador = GaussianNB()
classificador.fit(previsores, classe)

# história boa, dívida alta, garantias nenhuma, renda > 35
# história ruim, dívida alta, garantias adequada, renda < 15

resultado = classificador.predict([[0, 0, 1, 2], [2, 0, 0, 0]])
print(classificador.classes_)
print(classificador.class_count_)
print(classificador.class_prior_)
  features2[np.isnan(features2)] = -100

  #TODO 1: Compute the cosine similarity matrix of your own wifi signal strength
# def cosine_similarity(a,b):
#   numerator = np.dot(a,b)
#   x = np.sqrt(np.sum(np.square(a)))
#   y = np.sqrt(np.sum(np.square(b)))
#   denominator = x*y 
#   return numerator/denominator  

def new_matrix(num):
  matrix=np.zeros((num.shape[0], num.shape[0]))
  for x in range(num.shape[0]):
    for y in range(num.shape[0]):
      matrix[x,y]=cosine_similarity(num[x,:],num[y,:])
  return matrix

similarity_matrix = new_matrix(features)
plot_consine_similarity(similarity_matrix, labels)

print(new_matrix(features).shape[0])
  #TODO 2: Compute the cosine similarity matrix of two different people's wifi scans
similarity_matrix2 = new_matrix(features2)
plot_consine_similarity(similarity_matrix2, labels2)

  #TODO 3: Classify the location of the other person
clf= GaussianNB()
clf.fit(np.array(features2[:14]), np.array(labels2[:14]))
for i in range(14,35):
    print(clf.predict(features2[i]))
Example #42
0
y = dataset.iloc[:, 8:9].values

# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=0)

# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

# save the model to disk
filename = 'Naive Bayes Diabetes.sav'
pickle.dump(classifier, open(filename, 'wb'))

# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))

# Predicting the Test set results
result = loaded_model.score(X_test, y_test)
print("Test score: {0:.2f} %".format(100 * result))

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
Example #43
0
class GNB(object):
    def __init__(self):
        self.possible_labels = ['left', 'keep', 'right']
        self.clf = GaussianNB()
        #self.clf = ExtraTreesClassifier(n_estimators=20, max_depth=45, min_samples_split=4, random_state=0)
        #self.clf = MLPClassifier(hidden_layer_sizes=(4000),
        #alpha=1e-8, momentum=.7, verbose=True, tol=1e-7, max_iter=400)
        self.scaler = StandardScaler()

    def train(self, data, labels):
        """
		Trains the classifier with N data points and labels.

		INPUTS
		data - array of N observations
		  - Each observation is a tuple with 4 values: s, d, 
		    s_dot and d_dot.
		  - Example : [
			  	[3.5, 0.1, 5.9, -0.02],
			  	[8.0, -0.3, 3.0, 2.2],
			  	...
		  	]

		labels - array of N labels
		  - Each label is one of "left", "keep", or "right".
		"""
        #print(data)
        #print(labels)
        #x = [[i[0], i[2], i[3], i[1]%4] for i in data]
        x = [[i[3]] for i in data]
        #print(len(x))
        #self.clf.fit(x, labels)

        #self.scaler.fit(data[0])
        #data = self.scaler.transform(data)
        self.clf.fit(x, labels)

    def predict(self, observation):
        """
		Once trained, this method is called and expected to return 
		a predicted behavior for the given observation.

		INPUTS

		observation - a 4 tuple with s, d, s_dot, d_dot.
		  - Example: [3.5, 0.1, 8.5, -0.2]

		OUTPUT

		A label representing the best guess of the classifier. Can
		be one of "left", "keep" or "right".
		"""
        # TODO - complete this

        #i = self.scaler.transform([observation])
        i = [observation[3]]
        #prediction = self.clf.predict([[i[1], i[2], i[3], i[1]%4]])
        prediction = self.clf.predict(i)
        #print(prediction)

        return prediction
Example #44
0
accuracy = knn.score(xtest, ytest)
print(accuracy)

# creating a confusion matrix
knn_predictions = knn.predict(x_test)
'''
'''
from sklearn.tree import DecisionTreeClassifier
dtree_model = DecisionTreeClassifier(max_depth = 7).fit(xtrain, ytrain)
dtree_predictions = dtree_model.predict(x_test)
list=[]
'''
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB().fit(xtrain, ytrain)
gnb_predictions = gnb.predict(x_test)

# accuracy on X_test
accuracy = gnb.score(xtest, ytest)
print(accuracy)
list = []
for i in gnb_predictions:
    '''
    list.append(i)
    temp=list[i]
    list.append(out[temp])
    '''
    list.append(out[i])
    print(out[i])
##
Example #45
0
data = digits.images.reshape((n_samples, -1))  #???

classifier = GaussianNB()
#MLPClassifier(alpha=1, hidden_layer_sizes=(25, 15), random_state=1)
#svm.SVC(gamma=1)#KNeighborsClassifier(3)#GaussianNB()
filename = "naive_bayes.bin"

#Traing model with labelled data!!!
classifier.fit(data[:int(n_samples * 2 / 3)],
               digits.target[:int(n_samples * 2 / 3)])

#Save trained model to disk and reload it
_ = joblib.dump(classifier, filename)
classifier = joblib.load(filename)

predicted = classifier.predict(data[int(n_samples / 3):])
expected = digits.target[int(n_samples / 3):]

print("Classification report for classifier %s:\n%s\n" %
      (classifier, metrics.classification_report(expected, predicted)))
images_and_predictions = list(
    zip(digits.images[int(n_samples / 3):], predicted))
x = randint(0, int(n_samples / 3))  #to show different examples each time

for index, (image, prediction) in enumerate(images_and_predictions[x:x + 21]):
    plt.subplot(3, 7, index + 1)
    plt.axis('off')
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    plt.title('%i(%i)' % (prediction, expected[x + index]))

plt.show()
Example #46
0
previsores[:, 6] = labelencoder_previsores.fit_transform(previsores[:, 6])
previsores[:, 7] = labelencoder_previsores.fit_transform(previsores[:, 7])
previsores[:, 8] = labelencoder_previsores.fit_transform(previsores[:, 8])
previsores[:, 9] = labelencoder_previsores.fit_transform(previsores[:, 9])
previsores[:, 13] = labelencoder_previsores.fit_transform(previsores[:, 13])

onehotencoder = OneHotEncoder(categorical_features=[1, 3, 5, 6, 7, 8, 9, 13])
previsores = onehotencoder.fit_transform(previsores).toarray()

labelencoder_classe = LabelEncoder()
classe = labelencoder_classe.fit_transform(classe)

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
previsores = scaler.fit_transform(previsores)

from sklearn.model_selection import train_test_split
previsores_treinamento, previsores_teste, classe_treinamento, classe_teste = train_test_split(
    previsores, classe, test_size=0.15, random_state=0)

from sklearn.naive_bayes import GaussianNB
classificador = GaussianNB()
classificador.fit(previsores_treinamento, classe_treinamento)

# Resultado da previsão
previsoes = classificador.predict(previsores_teste)

# Verifica o percentual de acerto
from sklearn.metrics import confusion_matrix, accuracy_score
precisao = accuracy_score(classe_teste, previsoes)
matriz = confusion_matrix(classe_teste, previsoes)
Example #47
0
                                                                           0].values.tolist(
                                                                           )

# Test
test_x, test_y = read_test_class.iloc[:,
                                      1:].values, read_test_class.iloc[:,
                                                                       0].values.tolist(
                                                                       )

# =============================================================================
# TRADITIONAL MACHINE LEARNING ALGORITHMS
# =============================================================================
print("Training Gaussian Naive Bayes classifier:")
my_classifier = GaussianNB(priors=None)
my_classifier.fit(train_x, train_y)
pred_lbl_GNB = my_classifier.predict(test_x)  # Prediction label/class
pred_prb_GNB = my_classifier.predict_proba(test_x)
# predict probability for all target labels

print(" Training Random Forest classifier:")
my_classifier = RandomForestClassifier(max_depth=10, n_estimators=30)
my_classifier.fit(train_x, train_y)
pred_lbl_RFC = my_classifier.predict(test_x)  # Prediction label/class
pred_prb_RFC = my_classifier.predict_proba(test_x)
# predict probability for all target labels

print(" Training Nearest Neighbors classifier:")
n_neighbors = 100
# Optional (default = 5)
weights = 'uniform'  # str or callable, optional (default = 'uniform'), 'distance'
algorithm = 'kd_tree'  # {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
Example #48
0
def test_gnb_prior_large_bias():
    """Test if good prediction when class prior favor largely one class"""
    clf = GaussianNB(priors=np.array([0.01, 0.99]))
    clf.fit(X, y)
    assert clf.predict([[-0.1, -0.1]]) == np.array([2])
logreg = LogisticRegression()
logreg.fit(X_train,y_train)
pred_logreg = logreg.predict(X_test)
print(confusion_matrix(y_test, pred_logreg))
print(classification_report(y_test, pred_logreg))
print(accuracy_score(y_test, pred_logreg))
logreg.fit(X_train_all, y_train_all)
pred_all_logreg = logreg.predict(X_test_all)
sub_logreg = pd.DataFrame()
sub_logreg['PassengerId'] = df_test['PassengerId']
sub_logreg['Survived'] = pred_all_logreg
#sub_logmodel.to_csv('logmodel.csv',index=False)
from sklearn.naive_bayes import GaussianNB
gnb=GaussianNB()
gnb.fit(X_train,y_train)
pred_gnb = gnb.predict(X_test)
print(confusion_matrix(y_test, pred_gnb))
print(classification_report(y_test, pred_gnb))
print(accuracy_score(y_test, pred_gnb))

from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=20)
knn.fit(X_train_sc,y_train_sc)
pred_knn = knn.predict(X_test)
print(confusion_matrix(y_test, pred_knn))
print(classification_report(y_test, pred_knn))
print(accuracy_score(y_test, pred_knn))
knn.fit(X_train_all, y_train_all)
pred_all_knn = knn.predict(X_test_all)
sub_knn = pd.DataFrame()
sub_knn['PassengerId'] = df_test['PassengerId']
Example #50
0
checkpointer = ModelCheckpoint(filepath='best_weights.hdf5',
                               verbose=1,
                               save_best_only=True)
model.fit(x_train,
          y_train,
          validation_data=(x_test, y_test),
          callbacks=[monitor, checkpointer],
          epochs=1)
#print(history.history.keys())

feat_train = model.predict(x_train)
feat_test = model.predict(x_test)
gnb = GaussianNB()
gnb.fit(feat_train, np.argmax(y_train, axis=1))
print("trainning score...", gnb.score(feat_train, np.argmax(y_train, axis=1)))
print("testing score...", gnb.score(feat_test, np.argmax(y_test, axis=1)))
pred_labels = gnb.predict(feat_test)
probas = gnb.predict_proba(feat_test)
confusion_matrix = metrics.confusion_matrix(np.argmax(y_test, axis=1),
                                            pred_labels)
print("\n\nConfusion Matrix {} %".format(confusion_matrix))
classification_report = metrics.classification_report(np.argmax(y_test,
                                                                axis=1),
                                                      pred_labels,
                                                      target_names=outcome)
print("\n\nClassifiction Scores {} %".format(classification_report))
skplt.metrics.plot_precision_recall_curve(np.argmax(y_test, axis=1), probas)
plt.show()
skplt.metrics.plot_roc_curve(np.argmax(y_test, axis=1), probas)
plt.show()
Example #51
0
from sklearn.metrics import confusion_matrix

accuracy_score(y_cv, pred_cv)
matrix = confusion_matrix(y_cv, pred_cv)
print(matrix)

# In[72]:

from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()
nb.fit(x_train, y_train)

# In[73]:

pred_cv4 = nb.predict(x_cv)

# In[74]:

print("Accuracy:", metrics.accuracy_score(y_cv, pred_cv4))

# In[75]:

pred_test = nb.predict(testdf)

# In[85]:

finaldf['Loan_Status'] = pred_test
finaldf.head()

# In[86]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.25, random_state = 0)

#Logistic Regression
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_pred, y_test)

#Naive bayes
from sklearn.naive_bayes import GaussianNB
classifier_NB = GaussianNB()
classifier_NB.fit(X_train, y_train)

y_pred_NB = classifier_NB.predict(X_test)

from sklearn.metrics import confusion_matrix
cm_NB = confusion_matrix(y_test, y_pred)

from sklearn.metrics import accuracy_score
accuracy_NB = accuracy_score(y_pred, y_test)


Example #53
0
    def TrainModel(self):
        self.browser.clear()
        # Set Data Set
        X_train, X_test, y_train, y_test = self.X_train, self.X_test, self.y_train, self.y_test
        X_train1, X_test1, y_train1, y_test1 = X_train.values, X_test.values, y_train, y_test
        self.browser.append("Load Dataset")
        self.browser.append("")
        self.browser.append("")

        # LogisticRegression
        logreg = LogisticRegression()
        logreg.fit(X_train1, y_train1)
        y_pred_logreg = logreg.predict(X_test1)
        acc_log_train = round(logreg.score(X_train1, y_train1) * 100, 2)
        acc_log_test = round(logreg.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Logistic Regression Model>")
        self.browser.append("Train acc : " + str(acc_log_train) + "%")
        self.browser.append("Test acc : " + str(acc_log_test) + "%")
        self.browser.append("")
        #time.sleep(3)

        # Support Vector Machine's
        svc = SVC()
        svc.fit(X_train1, y_train1)
        y_pred_svc = svc.predict(X_test1)
        acc_svc_train = round(svc.score(X_train1, y_train1) * 100, 2)
        acc_svc_test = round(svc.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Support Vector Machine's>")
        self.browser.append("Train acc : " + str(acc_svc_train) + "%")
        self.browser.append("Test acc : " + str(acc_svc_test) + "%")
        self.browser.append("")
        #time.sleep(3)

        # Naive Bayes
        gaussian = GaussianNB()
        gaussian.fit(X_train1, y_train1)
        y_pred_gau = gaussian.predict(X_test1)
        acc_gau_train = round(gaussian.score(X_train1, y_train1) * 100, 2)
        acc_gau_test = round(gaussian.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Naive Bayes>")
        self.browser.append("Train acc : " + str(acc_gau_train) + "%")
        self.browser.append("Test acc : " + str(acc_gau_test) + "%")
        self.browser.append("")

        # K-Nearest Neighbours
        knn = KNeighborsClassifier(n_neighbors=3)
        knn.fit(X_train1, y_train1)
        y_pred_knn = knn.predict(X_test1)
        acc_knn_train = round(knn.score(X_train1, y_train1) * 100, 2)
        acc_knn_test = round(knn.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<K-Nearest Neighbours>")
        self.browser.append("Train acc : " + str(acc_knn_train) + "%")
        self.browser.append("Test acc : " + str(acc_knn_test) + "%")
        self.browser.append("")

        # Decision Tree's
        dec = DecisionTreeClassifier()
        dec.fit(X_train1, y_train1)
        y_pred_dec = dec.predict(X_test1)
        acc_dec_train = round(dec.score(X_train1, y_train1) * 100, 2)
        acc_dec_test = round(dec.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Decision Tree's>")
        self.browser.append("Train acc : " + str(acc_dec_train) + "%")
        self.browser.append("Test acc : " + str(acc_dec_test) + "%")
        self.browser.append("")

        #sgd
        sgd = SGDClassifier(max_iter=10000)
        sgd.fit(X_train1, y_train1)
        y_pred_sgd = sgd.predict(X_test1)
        acc_sgd_train = round(sgd.score(X_train1, y_train1) * 100, 2)
        acc_sgd_test = round(sgd.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Stochastic Gradient Decent Classifier>")
        self.browser.append("Train acc : " + str(acc_sgd_train) + "%")
        self.browser.append("Test acc : " + str(acc_sgd_test) + "%")
        self.browser.append("")

        #Linear SVC
        l_svc = LinearSVC()
        l_svc.fit(X_train1, y_train1)
        y_pred_l_svc = l_svc.predict(X_test1)
        acc_l_svc_train = round(l_svc.score(X_train1, y_train1) * 100, 2)
        acc_l_svc_test = round(l_svc.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Linear Support Vector Machines>")
        self.browser.append("Train acc : " + str(acc_l_svc_train) + "%")
        self.browser.append("Test acc : " + str(acc_l_svc_test) + "%")
        self.browser.append("")

        #Perceptron
        per = Perceptron(max_iter=1000)
        per.fit(X_train1, y_train1)
        y_pred_per = per.predict(X_test1)
        acc_per_train = round(per.score(X_train1, y_train1) * 100, 2)
        acc_per_test = round(per.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Perceptron>")
        self.browser.append("Train acc : " + str(acc_per_train) + "%")
        self.browser.append("Test acc : " + str(acc_per_test) + "%")
        self.browser.append("")

        #Random Forest
        random_forest = RandomForestClassifier(n_estimators=100)
        random_forest.fit(X_train1, y_train1)
        y_pred_random_forest = random_forest.predict(X_test1)
        acc_random_forest_train = round(
            random_forest.score(X_train1, y_train1) * 100, 2)
        acc_random_forest_test = round(
            random_forest.score(X_test1, y_test1) * 100, 2)
        self.browser.append("<Random Forest>")
        self.browser.append("Train acc : " + str(acc_random_forest_train) +
                            "%")
        self.browser.append("Test acc : " + str(acc_random_forest_test) + "%")
        self.browser.append("")

        models = pd.DataFrame({
            'Model': [
                'Support Vector Machines', 'KNN', 'Logistic Regression',
                'Random Forest', 'Naive Bayes', 'Perceptron',
                'Stochastic Gradient Decent', 'Linear SVC', 'Decision Tree'
            ],
            'Score': [
                acc_svc_test, acc_knn_test, acc_log_test,
                acc_random_forest_test, acc_gau_test, acc_per_test,
                acc_sgd_test, acc_l_svc_test, acc_dec_test
            ]
        })
        models.sort_values(by='Score', ascending=True)
        models = PandasModelTrainData(models)
        self.tableView = QTableView()
        self.tableView.setSortingEnabled(True)
        self.tableView.setModel(models)
        self.tableView.setGeometry(850, 100, 320, 400)
        self.tableView.setColumnWidth(0, 200)
        self.tableView.sortByColumn(1, Qt.DescendingOrder)
        self.tableView.setWindowTitle("Accuracy")
        self.tableView.show()
                                                    test_size=0.25,
                                                    random_state=0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Fitting Naive Bayes to the Training set
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

# Visualising the Training set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_train, y_train
colors = np.array(["red", "green"])
X1, X2 = np.meshgrid(
    np.arange(start=X_set[:, 0].min() - 1,
              stop=X_set[:, 0].max() + 1,
              step=0.01),
    np.arange(start=X_set[:, 1].min() - 1,
              stop=X_set[:, 1].max() + 1,
Example #55
0
cm = confusion_matrix(y_test, y_pred)

print(cm)

# In[6]:

from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()

start = time.time()
gnb.fit(X_train, y_train)
print('training completed in %s seconds' % (time.time() - start))

start = time.time()
y_pred = gnb.predict(X_test)
print('prediction completed in %s seconds' % (time.time() - start))

cm = confusion_matrix(y_test, y_pred)

print(cm)

# In[8]:

from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(criterion='entropy')

start = time.time()
dt.fit(X_train, y_train)
print('training completed in %s seconds' % (time.time() - start))
KNN_predict_prob = KNN.predict_proba(data_all_scaled)
# Post-processing using Graph-Cut
Seg_Label, seg_accuracy = Post_Processing(KNN_predict_prob,height,width,\
                                          num_classes,y_test,test_indexes)
print('(KNN) Train_Acc=%.3f, Cla_Acc=%.3f, Seg_Acc=%.3f(Time_cost=%.3f)'\
      % (KNN.score(X_train_scaled,y_train),KNN.score(X_test_scaled,y_test),\
         seg_accuracy, (time.time()-start_time)))
# draw classification map
draw(GT_Label, KNN_Label, Seg_Label, train_map, test_map)
print('--------------------------------------------------------------------')

# Naive Bayes: GaussianNB
from sklearn.naive_bayes import GaussianNB
start_time = time.time()
GaussNB = GaussianNB().fit(X_train, y_train)
GaussNB_Label = GaussNB.predict(data_all).reshape(
    width, height).astype(int).transpose(1, 0)
GaussNB_predict_prob = GaussNB.predict_proba(data_all)
# Post-processing using Graph-Cut
Seg_Label, seg_accuracy = Post_Processing(GaussNB_predict_prob,height,width,\
                                          num_classes,y_test,test_indexes)
print('(GaussNB) Train_Acc=%.3f, Cla_Acc=%.3f, Seg_Acc=%.3f(Time_cost=%.3f)'\
      % (GaussNB.score(X_train,y_train),GaussNB.score(X_test,y_test),\
         seg_accuracy, (time.time()-start_time)))
# draw classification map
draw(GT_Label, GaussNB_Label, Seg_Label, train_map, test_map)
print('--------------------------------------------------------------------')

# discriminant_analysis - linear discriminant analysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
start_time = time.time()
LDA = LinearDiscriminantAnalysis().fit(X_train, y_train)
Example #57
0

# %% codecell
# preprocess data
x_train, x_test, y_train, y_test, sc_x = preprocessed_data.preprocess_data()


# %% codecell
# Fitting Naive Bayes to the Training set
classifier = GaussianNB()
classifier.fit(x_train, y_train)


# %% codecell
# Predicting the Test set results
y_pred = classifier.predict(x_test)


# %% codecell
# Making the Confusion Matrix
cm = confusion_matrix(y_test, y_pred)


# %% codecell
# Visualising the Training set results
x_set, y_set = x_train, y_train
X1, X2 = np.meshgrid(np.arange(start=x_set[:, 0].min() - 1, stop=x_set[:, 0].max() + 1, step=0.01),
                     np.arange(start=x_set[:, 1].min() - 1, stop=x_set[:, 1].max() + 1, step=0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
             alpha=0.75, cmap=ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
Example #58
0
dup_df['Stage_cat'] = Stage_cat
dup_df['Duration_cat'] = Duration_cat
dup_df['CD4start_cat'] = CD4start_cat
dup_df['CD4number_cat'] = CD4number_cat
dup_df['CD4last_cat'] = CD4last_cat
dup_df['Perform_cat'] = Perform_cat

features = dup_df.values[:, :6]
target = dup_df.values[:, 6]
features_train, features_test, target_train, target_test = train_test_split(
    features, target, test_size=0.20, random_state=20)
# print(features_train)

clf = GaussianNB()
clf.fit(features_train, target_train)
target_pred = clf.predict(features_test)

acc = accuracy_score(target_test, target_pred, normalize=True)

PPS = {
    'pp1': {
        'gender': 1,
        'who_stage': 1,
        'duration': 1,
        'start_cd4': 1,
        'no_cd4_done': 1,
        'recent_cd4': 1
    }
}

    skplt.plot_confusion_matrix(yte, ypred)
    plt.show()


# Read the data
if not os.path.isfile('./xtr.npy') or \
    not os.path.isfile('./xte.npy') or \
    not os.path.isfile('./ytr.npy') or \
    not os.path.isfile('./yte.npy'):
    xtr, xte, ytr, yte = getEmbeddings("datasets/train.csv")
    np.save('./xtr', xtr)
    np.save('./xte', xte)
    np.save('./ytr', ytr)
    np.save('./yte', yte)

xtr = np.load('./xtr.npy')
xte = np.load('./xte.npy')
ytr = np.load('./ytr.npy')
yte = np.load('./yte.npy')

# Use the built-in Naive Bayes classifier
gnb = GaussianNB()
gnb.fit(xtr, ytr)
y_pred = gnb.predict(xte)
m = yte.shape[0]
n = (yte != y_pred).sum()
print("Accuracy = " + format((m - n) / m * 100, '.2f') + "%")  # 72.94%

# Draw the confusion matrix
plot_cmat(yte, y_pred)
Example #60
0
    print("normal_error.shape",normal_error.shape)
    print("abno_error.shape",abno_error.shape)

    normal_error = np.c_[normal_error, np.zeros(len(normal_error))]
    abno_error = np.c_[abno_error, np.ones(len(abno_error))]


    dataset = np.r_[normal_error, abno_error]
    np.random.shuffle(dataset)

    train_x, test_x, train_y, test_y = train_test_split(dataset[:,:-1], dataset[:,-1], test_size=0.3, random_state=42)


    clf = GaussianNB()
    clf.fit(train_x, train_y)
    y_hat = clf.predict(train_x)
    y_score = clf.predict_proba(train_x)
    y_log_score = clf.predict_log_proba(train_x)
    y_test_hat = clf.predict(test_x)
    y_test_score = clf.predict_proba(test_x)
    print(accuracy_score(train_y, y_hat))
    print(metrics.recall_score(train_y, y_hat))
    print(metrics.classification_report(train_y, y_hat))
    print(metrics.classification_report(test_y, y_test_hat))
    print(y_score)
    print(y_test_score)
    print(y_test_hat)
    print(clf.classes_)

    # fpr, tpr, thresholds = metrics.roc_curve(train_y, y_hat)
    fpr, tpr, thresholds = metrics.roc_curve(test_y, y_test_score[:,-1])