コード例 #1
0
 def predict(self):
     ClassifierN = 0
     classifier = GaussianNB()
     #X_train,X_test,y_train,y_test = train_test_split(self.training,self.trainingLabels,test_size=0.1,random_state=33)
     classifier.fit(self.training, self.trainingLabels)
     # predict UnLabledData
     pred_labelsForTrainingUn = classifier.predict(self.test)
     print 'Enhanced classifier...'
     while 1:
         p1 = pred_labelsForTrainingUn
         # 将带λ参数的无标签数据拟合入分类器
         classifier.partial_fit(
             self.test,
             pred_labelsForTrainingUn,
             classes=['0', '1'],
             sample_weight=np.ones(len(self.test), dtype=np.float) *
             self.Lambda)
         pred_labelsForTrainingUn = classifier.predict(self.test)
         p2 = pred_labelsForTrainingUn
         # 判断分类器是否稳定
         if list(p1) == list(p2):
             ClassifierN += 1
         elif ClassifierN > 0:
             ClassifierN = 0
         if ClassifierN == 20:
             break
     pred_labels = classifier.predict(self.test)
     print 'naive_bayes with EM algorithm:'
     print classification_report(self.testLabels, pred_labels, digits=4)
     return classification_report(self.testLabels, pred_labels, digits=4)
コード例 #2
0
ファイル: test_naive_bayes.py プロジェクト: daidan/MLearning
def test_gnb_sample_weight():
    """Test whether sample weights are properly used in GNB. """
    # Sample weights all being 1 should not change results
    sw = np.ones(6)
    clf = GaussianNB().fit(X, y)
    clf_sw = GaussianNB().fit(X, y, sw)

    assert_array_almost_equal(clf.theta_, clf_sw.theta_)
    assert_array_almost_equal(clf.sigma_, clf_sw.sigma_)

    # Fitting twice with half sample-weights should result
    # in same result as fitting once with full weights
    sw = rng.rand(y.shape[0])
    clf1 = GaussianNB().fit(X, y, sample_weight=sw)
    clf2 = GaussianNB().partial_fit(X, y, classes=[1, 2], sample_weight=sw / 2)
    clf2.partial_fit(X, y, sample_weight=sw / 2)

    assert_array_almost_equal(clf1.theta_, clf2.theta_)
    assert_array_almost_equal(clf1.sigma_, clf2.sigma_)

    # Check that duplicate entries and correspondingly increased sample
    # weights yield the same result
    ind = rng.randint(0, X.shape[0], 20)
    sample_weight = np.bincount(ind, minlength=X.shape[0])

    clf_dupl = GaussianNB().fit(X[ind], y[ind])
    clf_sw = GaussianNB().fit(X, y, sample_weight)

    assert_array_almost_equal(clf_dupl.theta_, clf_sw.theta_)
    assert_array_almost_equal(clf_dupl.sigma_, clf_sw.sigma_)
コード例 #3
0
def main():
    """
        GaussianNB类的主要参数仅有一个,即先验概率priors ,对应Y的各个类别的先验概率P(Y=Ck)。
        这个值默认不给出,如果不给出此时P(Y=Ck)=mk/m。其中m为训练集样本总数量,mk为输出为第k类别的训练集样本数。如果给出的话就以priors为准
    """
    nb = GaussianNB(priors=None)

    # 多次循环fit
    nb.partial_fit()

    """
        binarize:
            BernoulliNB一共有4个参数,其中3个参数的名字和意义和MultinomialNB完全相同。
            唯一增加的一个参数是binarize。这个参数主要是用来帮BernoulliNB处理二项分布的,可以是数值或者不输入。
            如果不输入,则BernoulliNB认为每个数据特征都已经是二元的。否则的话,小于binarize的会归为一类,大于binarize的会归为另外一类
    """
    nb = BernoulliNB(alpha=1.0, fit_prior=True, class_prior=None, binarize=.0)


    """
        alpha:
            λ为一个大于0的常数,常常取为1,即拉普拉斯平滑。也可以取其他值
            参数alpha即为上面的常数λ,如果你没有特别的需要,用默认的1即可。如果发现拟合的不好,需要调优时,可以选择稍大于1或者稍小于1的数
        fit_prior:
            布尔参数fit_prior表示是否要考虑先验概率,如果是false,则所有的样本类别输出都有相同的类别先验概率。
            否则可以自己用第三个参数class_prior输入先验概率,或者不输入第三个参数class_prior让MultinomialNB自己从训练集样本来计算先验概率,
                此时的先验概率为P(Y=Ck)=mk/m。其中m为训练集样本总数量,mk为输出为第k类别的训练集样本数
            fit_prior	class_prior	  最终先验概率
                false	  填或者不填没有意义	P(Y=Ck)=1/k
                true	   不填	    P(Y=Ck)=mk/m
                true	   填	     P(Y=Ck)=class_prior
    """
    nb = MultinomialNB(alpha=1.0, fit_prior=True, class_prior=None)
コード例 #4
0
def test_gnb_sample_weight():
    """Test whether sample weights are properly used in GNB. """
    # Sample weights all being 1 should not change results
    sw = np.ones(6)
    clf = GaussianNB().fit(X, y)
    clf_sw = GaussianNB().fit(X, y, sw)

    assert_array_almost_equal(clf.theta_, clf_sw.theta_)
    assert_array_almost_equal(clf.sigma_, clf_sw.sigma_)

    # Fitting twice with half sample-weights should result
    # in same result as fitting once with full weights
    sw = rng.rand(y.shape[0])
    clf1 = GaussianNB().fit(X, y, sample_weight=sw)
    clf2 = GaussianNB().partial_fit(X, y, classes=[1, 2], sample_weight=sw / 2)
    clf2.partial_fit(X, y, sample_weight=sw / 2)

    assert_array_almost_equal(clf1.theta_, clf2.theta_)
    assert_array_almost_equal(clf1.sigma_, clf2.sigma_)

    # Check that duplicate entries and correspondingly increased sample
    # weights yield the same result
    ind = rng.randint(0, X.shape[0], 20)
    sample_weight = np.bincount(ind, minlength=X.shape[0])

    clf_dupl = GaussianNB().fit(X[ind], y[ind])
    clf_sw = GaussianNB().fit(X, y, sample_weight)

    assert_array_almost_equal(clf_dupl.theta_, clf_sw.theta_)
    assert_array_almost_equal(clf_dupl.sigma_, clf_sw.sigma_)
コード例 #5
0
def main():
    for i in range(num_chunks):

        smart_data = pd.read_csv(INPUT_PATH, iterator=True, chunksize=chunk_size)
        model = GaussianNB()
        counter = 0

        test_y = pd.DataFrame()
        test_X = pd.DataFrame()

        for data in smart_data:
            # data_working = data[data['failure'] == 0]
            # data_broken = data[data['failure'] == 1]
            # data_working = data_working.sample(n=len(data_broken.index))
            # data = pd.concat((data_broken, data_working))
            # data = data.sample(frac=1)

            data_y = data['failure']
            data_X = data.drop(labels=['failure'], axis=1)

            if counter == i:
                test_y = data_y
                test_X = data_X
            else:
                model.partial_fit(data_X, data_y, classes=[0, 1])
            counter += 1

        predictions = model.predict(test_X)
        print(classification_report(test_y, predictions, output_dict=True))
        print(confusion_matrix(test_y, predictions))
コード例 #6
0
ファイル: SemiSAD.py プロジェクト: recq-cse/SDLib
 def predict(self):
     ClassifierN = 0
     classifier = GaussianNB()
     X_train, X_test, y_train, y_test = train_test_split(
         self.training,
         self.trainingLabels,
         test_size=0.75,
         random_state=33)
     classifier.fit(X_train, y_train)
     # predict UnLabledData
     #pred_labelsForTrainingUn = classifier.predict(X_test)
     print 'Enhanced classifier...'
     while 1:
         proba_labelsForTrainingUn = classifier.predict_proba(X_test)
         X_test_labels = np.hstack((X_test, proba_labelsForTrainingUn))
         X_test_labels0_sort = sorted(X_test_labels,
                                      key=lambda x: x[5],
                                      reverse=True)
         if X_test_labels0_sort[4][5] > X_test_labels0_sort[4][6]:
             a = map(lambda x: x[:5], X_test_labels0_sort)
             b = a[0:5]
             classifier.partial_fit(
                 b, ['0', '0', '0', '0', '0'],
                 classes=['0', '1'],
                 sample_weight=np.ones(len(b), dtype=np.float) *
                 self.Lambda)
             X_test_labels = X_test_labels0_sort[5:]
             X_test = a[5:]
         X_test_labels0_sort = sorted(X_test_labels,
                                      key=lambda x: x[6],
                                      reverse=True)
         if X_test_labels0_sort[4][5] < X_test_labels0_sort[4][6]:
             a = map(lambda x: x[:5], X_test_labels0_sort)
             b = a[0:5]
             classifier.partial_fit(
                 b, ['1', '1', '1', '1', '1'],
                 classes=['0', '1'],
                 sample_weight=np.ones(len(b), dtype=np.float) * 1)
             X_test = a[5:]
         if len(X_test) < 6:
             break
     # while 1 :
     #     p1 = pred_labelsForTrainingUn
     #     # 将带λ参数的无标签数据拟合入分类器
     #     classifier.partial_fit(X_test, pred_labelsForTrainingUn,classes=['0','1'], sample_weight=np.ones(len(X_test),dtype=np.float)*self.Lambda)
     #     pred_labelsForTrainingUn = classifier.predict(X_test)
     #     p2 = pred_labelsForTrainingUn
     #     # 判断分类器是否稳定
     #     if list(p1)==list(p2) :
     #         ClassifierN += 1
     #     elif ClassifierN > 0:
     #         ClassifierN = 0
     #     if ClassifierN == 20:
     #         break
     pred_labels = classifier.predict(self.test)
     print 'naive_bayes with EM algorithm:'
     print classification_report(self.testLabels, pred_labels, digits=4)
     return classification_report(self.testLabels, pred_labels, digits=4)
コード例 #7
0
def test():
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    Y = np.array([1, 1, 1, 2, 2, 2])
    clf = GaussianNB()
    clf.fit(X, Y)
    print(clf.predict([[-0.8, -1]]))
    clf_pf = GaussianNB()
    clf_pf.partial_fit(X, Y, np.unique(Y))
    print(clf_pf.predict([[-0.8, -1]]))
コード例 #8
0
def bayes_test():
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    Y = np.array([1, 1, 1, 2, 2, 2])
    clf = GaussianNB()
    clf.fit(X, Y)
    print(clf.predict([[-0.8, -1]]))
    clf_pf = GaussianNB()
    clf_pf.partial_fit(X, Y, np.unique(Y))
    print(clf_pf.predict([[-0.8, -1]]))
コード例 #9
0
def main():
    #Gaussian Naive Bayes classifier
    clf = GaussianNB()
    clf.fit(X, Y)
    #vector of predictions
    print clf.predict([[-0.8, -1]])

    clf_pf = GaussianNB()
    clf_pf.partial_fit(X, Y, np.unique(Y))
    print clf_pf.predict([[-0.8, -1]])
コード例 #10
0
def trainGaussianNB(X,y,loadweights):
	print("Training GaussianNB...")
	classifier = GaussianNB()
	if loadweights:
		with open('weights/GaussianNB.pickle', 'rb') as handle:
			classifier = pickle.load(handle)
	for _ in range(10):
		classifier.partial_fit(X,y,classes=[0,1])
	with open('weights/GaussianNB.pickle', 'wb') as handle:
		pickle.dump(classifier, handle, protocol=pickle.HIGHEST_PROTOCOL)
	print (classifier.score(X,y))
コード例 #11
0
ファイル: test_naive_bayes.py プロジェクト: daidan/MLearning
def test_gnb_partial_fit():
    clf = GaussianNB().fit(X, y)
    clf_pf = GaussianNB().partial_fit(X, y, np.unique(y))
    assert_array_almost_equal(clf.theta_, clf_pf.theta_)
    assert_array_almost_equal(clf.sigma_, clf_pf.sigma_)
    assert_array_almost_equal(clf.class_prior_, clf_pf.class_prior_)

    clf_pf2 = GaussianNB().partial_fit(X[0::2, :], y[0::2], np.unique(y))
    clf_pf2.partial_fit(X[1::2], y[1::2])
    assert_array_almost_equal(clf.theta_, clf_pf2.theta_)
    assert_array_almost_equal(clf.sigma_, clf_pf2.sigma_)
    assert_array_almost_equal(clf.class_prior_, clf_pf2.class_prior_)
コード例 #12
0
class DrunkLearningNB(DrunkLearning):
    """drunk_learning class"""
    def __init__(self):
        super(DrunkLearningNB, self).__init__()
        self.clf = GaussianNB()
        self.filename = 'modelNB.pkl'

    def partial_fit(self, X, y):
        X = np.array([X])
        y = np.array(y)
        self.clf.partial_fit(X, y, [0, 1])
        joblib.dump(self.clf, self.filename, compress=9)
コード例 #13
0
def test_gnb_partial_fit():
    clf = GaussianNB().fit(X, y)
    clf_pf = GaussianNB().partial_fit(X, y, np.unique(y))
    assert_array_almost_equal(clf.theta_, clf_pf.theta_)
    assert_array_almost_equal(clf.sigma_, clf_pf.sigma_)
    assert_array_almost_equal(clf.class_prior_, clf_pf.class_prior_)

    clf_pf2 = GaussianNB().partial_fit(X[0::2, :], y[0::2], np.unique(y))
    clf_pf2.partial_fit(X[1::2], y[1::2])
    assert_array_almost_equal(clf.theta_, clf_pf2.theta_)
    assert_array_almost_equal(clf.sigma_, clf_pf2.sigma_)
    assert_array_almost_equal(clf.class_prior_, clf_pf2.class_prior_)
コード例 #14
0
def nb_partialFit(X, Y):
    m = X.shape[0]
    clf = GaussianNB()

    chunk_size = 10000
    n_chunk = int(m / chunk_size) - 1
    for i in tqdm(range(n_chunk)):
        x_partial = X[i * chunk_size:(i + 1) * chunk_size]
        y_partial = Y[i * chunk_size:(i + 1) * chunk_size]
        clf.partial_fit(x_partial.todense(), y_partial, classes=[0, 4])

    return clf
コード例 #15
0
def test_mixednb_all_continuous():
    """Check that MixedNB is equivalent to GaussNB for continuous variables."""
    X, y, types = _classification_task()

    mixed_nb = MixedNB(is_nominal=[False] * 5)
    mixed_nb.partial_fit(X, y, classes=['a', 'b', 'c'])
    mixed_pred = mixed_nb.predict_proba(X)

    gauss_nb = GaussianNB()
    gauss_nb.partial_fit(X, y, classes=['a', 'b', 'c'])
    gauss_pred = gauss_nb.predict_proba(X)

    assert np.allclose(mixed_pred, gauss_pred)
コード例 #16
0
ファイル: assignment_3.py プロジェクト: simonq80/ml-task3
def perform_naive_bayes(train_X, train_Y, test_X, test_Y):
    # Split data into 2 to avoid memory error
    partial_size = ROWS / 2
    train_X0 = train_X[partial_size:]
    train_X1 = train_X[:partial_size]
    train_Y0 = train_Y[partial_size:]
    train_Y1 = train_Y[:partial_size]

    gnb = GaussianNB()
    gnb.partial_fit(train_X0, train_Y0, classes=np.arange(0, 5))
    gnb.partial_fit(train_X1, train_Y1)
    pred_Y = gnb.predict(test_X)
    return fbeta_score(test_Y, pred_Y, 0.1,
                       average='macro'), accuracy_score(test_Y, pred_Y)
コード例 #17
0
def gaussian():
    """
    GaussianNB_高斯朴素贝叶斯
    :return:
    """
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    Y = np.array([1, 1, 1, 2, 2, 2])
    clf = GaussianNB()
    clf.fit(X, Y)
    print(clf.predict([[-0.8, -1]]))
    clf_pf = GaussianNB()
    clf_pf.partial_fit(X, Y, np.unique(Y))
    print(clf_pf.predict([[-0.8, -1]]))
    return None
コード例 #18
0
ファイル: SemiSAD.py プロジェクト: CoderWZW/SDLib
    def predict(self):
            ClassifierN = 0
            classifier = GaussianNB()
            X_train,X_test,y_train,y_test = train_test_split(self.training,self.trainingLabels,test_size=0.75,random_state=33)
            classifier.fit(X_train, y_train)
            # predict UnLabledData
            #pred_labelsForTrainingUn = classifier.predict(X_test)
            print 'Enhanced classifier...'
            while 1:
                if len(X_test)<=5: # min
                    break         #min
                proba_labelsForTrainingUn = classifier.predict_proba(X_test)
                X_test_labels = np.hstack((X_test, proba_labelsForTrainingUn))
                X_test_labels0_sort = sorted(X_test_labels,key=lambda x:x[5],reverse=True)
                if X_test_labels0_sort[4][5]>X_test_labels0_sort[4][6]:
                    a = map(lambda x: x[:5], X_test_labels0_sort)
                    b = a[0:5]
                    classifier.partial_fit(b, ['0','0','0','0','0'], classes=['0', '1'],sample_weight=np.ones(len(b), dtype=np.float) * self.Lambda)
                    X_test_labels = X_test_labels0_sort[5:]
                    X_test = a[5:]
                if len(X_test)<6: # min
                    break         #min

                X_test_labels0_sort = sorted(X_test_labels, key=lambda x: x[5], reverse=True)
                if X_test_labels0_sort[4][5]<=X_test_labels0_sort[4][6]: #min
                    a = map(lambda x: x[:5], X_test_labels0_sort)
                    b = a[0:5]
                    classifier.partial_fit(b, ['1', '1', '1', '1', '1'], classes=['0', '1'],sample_weight=np.ones(len(b), dtype=np.float) * 1)
                    X_test_labels = X_test_labels0_sort[5:]  # min
                    X_test = a[5:]
                if len(X_test)<6:
                    break
            # while 1 :
            #     p1 = pred_labelsForTrainingUn
            #     # 将带λ参数的无标签数据拟合入分类器
            #     classifier.partial_fit(X_test, pred_labelsForTrainingUn,classes=['0','1'], sample_weight=np.ones(len(X_test),dtype=np.float)*self.Lambda)
            #     pred_labelsForTrainingUn = classifier.predict(X_test)
            #     p2 = pred_labelsForTrainingUn
            #     # 判断分类器是否稳定
            #     if list(p1)==list(p2) :
            #         ClassifierN += 1
            #     elif ClassifierN > 0:
            #         ClassifierN = 0
            #     if ClassifierN == 20:
            #         break
            pred_labels = classifier.predict(self.test)
            print 'naive_bayes with EM algorithm:'
            return pred_labels
コード例 #19
0
ファイル: bugZilla_preprocess.py プロジェクト: wego1236/SREE
def train_NB():
    data = pandas.read_csv("bugSample_encoded.csv").values
    #data = pandas.read_csv("test.csv").values
    X = np.array(data[:, 0:-1])
    y = np.array(data[:, -1])
    kf = KFold(n_splits=5)
    clf = GaussianNB()
    print(clf)
    acc = 0
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        clf.partial_fit(X_train, y_train, classes=[0, 1, 2, 3, 4, 5, 6])
        predict = clf.predict(X_test)
        acc = accuracy_score(y_test, predict)
    print("Naive Bayes: ", acc)
コード例 #20
0
    def fit(self, X, y):
        train_size = len(X)
        x_test = X[int(train_size - train_size / 6):train_size]
        X = X[0:train_size - int(train_size / 6)]
        y_test = y[int(train_size - train_size / 6):train_size]
        y = y[0:train_size - int(train_size / 6)]
        m = 1
        self.createExpert()
        num_classes = len(np.unique(y))
        predictions = np.zeros((num_classes, ))
        max_weight = 0
        acc = []
        nb_acc = []
        sizes = []
        nb = GaussianNB()
        for i, sample in enumerate(X):
            for j, exp in enumerate(self.experts):
                y_hat = self.getExpertPrediction(exp, sample)
                y_hat = int(y_hat)
                if (y_hat != y[i]) and (i % self.period == 0):
                    self.weights[j] *= self.beta
                    print(
                        str(self.experts[j]) + "th Expert Weight: " +
                        str(self.weights[j]))

                predictions[y_hat] += self.weights[j]
                max_weight = max(max_weight, self.weights[j])
            y_hat = np.array([np.argmax(predictions)])

            if i % self.period == 0:

                self.normalizeWeights(max_weight)
                self.removeExpert()
                if y_hat != y[i]:
                    m = m + 1
                    self.createExpert()

            nb.partial_fit([sample], [y[i]], np.unique(y))
            nb_pred = nb.predict(x_test)
            nb_acc.append(accuracy_score(y_test, nb_pred))
            for exp in self.experts:
                exp.partial_fit([sample], [y[i]], np.unique(y))
            sizes.append(len(self.experts))
            acc.append(accuracy_score(self.predict(x_test), y_test))
        return sizes, acc, nb_acc
コード例 #21
0
def trainClassifier(partition_data,cols):
      #### Naive Bayes 
	gnb = GaussianNB()
      #### TODO change and put in parallel
 
	data=np.array(list(partition_data[1]))
	#the first call to partial_fit must include a list of all the classes 
	gnb=gnb.partial_fit(data[:,0:cols.value],data[:,cols.value],[0,1]) 
	return gnb
コード例 #22
0
ファイル: train.py プロジェクト: sandialabs/packet2vec
def naiveBayesClassifier(data, output_dir):
    """
    Trains a Naive Bayes classifier on the data 
    generated in the 'features' step.

    Parameters
    ----------
    data : str
        Path to the working data directory containing the features
    output_dir : str
        Path to the output directory where the models are stored
    """
    # Grab list of files
    features = os.path.join(data, 'features')
    feature_files = [os.path.join(features, f) for f in os.listdir(features)]

    clf = GaussianNB()
    starting_index = scan_for_start(feature_files)

    with h5py.File(feature_files[starting_index], 'r') as hf:
        X = hf['vectors'][:]
        y = hf['labels'][:]

        clf.fit(X, y)
        # Delete from list so we dont train on it again
        del feature_files[starting_index]

    for i, f in enumerate(feature_files):
        if (i + 1) % 10 == 0:
            print("Training GNB on file {} of {}".format(
                i + 1,
                len(feature_files) + 1))

        with h5py.File(f, 'r') as hf:
            X = hf['vectors'][:]
            y = hf['labels'][:]

            clf.partial_fit(X, y)

    output_path = os.path.join(output_dir, 'classifiers')
    if not os.path.isdir(output_path):
        os.makedirs(output_path)

    joblib.dump(clf, os.path.join(output_path, 'gnb.joblib'))
コード例 #23
0
class GaussianBatchNB(TransformerMixin):
    def __init__(self, batch_size, classes, *args, **kwargs):
        self._batch_size = batch_size
        self._classes = classes
        self._args = args
        self._kwargs = kwargs
        self._model = GaussianNB(*args, **kwargs)
        
    def fit(self, x, y, **fit_params):
        batch_size = self._batch_size
        self._model = GaussianNB(*self._args, **self._kwargs)
        
        for index in tqdm(range(batch_size, x.shape[0]+batch_size, batch_size)):
            self._model.partial_fit(
                x[index-batch_size:index, :].toarray(),
                y[index-batch_size:index], 
                classes=self._classes
            )                  
        return self

    @staticmethod
    def transform(x, y=None, **fit_params):
        return x
    
    def predict(self, x):
        batch_size = self._batch_size
        predictions = []
        for index in tqdm(range(batch_size, x.shape[0]+batch_size, batch_size)):
            predictions.extend(
                self._model.predict(
                    x[index-batch_size:index, :].toarray()
                ).tolist()
            )
        return np.array(predictions).ravel()
    
    def score(self, x, y):
        y_pred = self.predict(x)
        return accuracy_score(y, y_pred)

    def __str__(self):
        return "GaussianBatchNB()"

    def __repr__(self):
        return self .__str__()
コード例 #24
0
class TfidfGaussianNB:
    def __init__(self, nfeats=300, vocab=None):
        self.clf = GaussianNB()
        self.vectorizer = TfidfVectorizer(max_features=nfeats,
                                          dtype=np.float32,
                                          vocabulary=vocab)

    def train(self,
              train_data,
              train_labels,
              classes,
              feature_selection=False,
              percentile=100,
              batch_size=1000):
        if feature_selection:
            selector = SelectPercentile(chi2, percentile=percentile)
            X = selector.fit_transform(
                self.vectorizer.fit_transform(train_data), train_labels)
            new_vocab = list(
                np.array(self.vectorizer.vocabulary)[selector.get_support()])
            self.vectorizer = TfidfVectorizer(dtype=np.float32,
                                              vocabulary=new_vocab)
        print(len(self.vectorizer.vocabulary))
        for i in range(0, train_data.size, batch_size):
            print(i)
            data = train_data[i:i + batch_size]
            X = self.vectorizer.fit_transform(data).toarray()
            # self.clf.partial_fit(X, train_labels[i:i+batch_size], classes=classes)
            self.clf.partial_fit(X,
                                 train_labels[i:i + batch_size],
                                 classes=classes)

    def predict(self, data):
        return self.clf.predict(self.vectorizer.fit_transform(data).toarray())

    def load_model(self, filename):
        with open(filename + ".p", 'rb') as fp:
            self.clf = pickle.load(fp)

    def save_model(self, filename):
        pickle.dump(self.clf, open(filename + '.p', 'wb'))
コード例 #25
0
def biased_naive_bayes(df, X_test, Y_test):
    fail_df = df.copy(deep=True).loc[df["G3"] == 0]
    pass_df = df.copy(deep=True).loc[df["G3"] == 1]

    # Target values are G3
    Y = df.pop("G3")
    Y_fail = fail_df.pop("G3")
    Y_pass = pass_df.pop("G3")

    # Feature set is remaining features
    X = df
    X_fail = fail_df
    X_pass = pass_df

    gnb = GaussianNB()
    for i in (0, 3):
        gnb.partial_fit(X_fail, Y_fail, [0, 1])
    gnb.partial_fit(X_pass, Y_pass, [0, 1])
    for i in (0, 3):
        gnb.partial_fit(X_fail, Y_fail, [0, 1])

    print("\n\nGuassian Naive Bayes (Boosted) Accuracy: ", gnb.score(X_test, Y_test))
    confuse(Y, gnb.predict(X))

    return gnb
コード例 #26
0
def demoOne():
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    y = np.array([1, 1, 1, 2, 2, 2])

    clf = GaussianNB(priors=None)
    clf.fit(X, y)
    print(clf.predict([[-0.8, -1]]))
    print('predict_prob: ', clf.predict_proba([[-0.8, -1]]))
    print('predict_log_prob: ', clf.predict_log_proba([[-0.8, -1]]))
    print(clf.score([[-0.8, -1]], clf.predict([[-0.8, -1]])))
    print(clf.partial_fit(X, y, classes=np.unique(y)))
    print(clf.set_params())
    return X, y
コード例 #27
0
ファイル: grading.py プロジェクト: Pavan-Red-E/aryan1
def getgrade():

    model = train_model(x_train, y_train, x_test, y_test, GaussianNB)
    #from sklearn.svm import SVC
    #model = train_model(x_train, y_train, x_test, y_test, SVC, C=0.05, kernel='linear')


    # In[6]:


    gnb = GaussianNB()


    # In[7]:


    gnb.partial_fit(x, y, np.unique(y))


    # In[8]:


    return gnb.predict([[499,30,30,65,43,16,47,134,3,16,59,102,65,0]])
コード例 #28
0
def get_class_name(dataHandledPath):
    filePath = getFilePath(dataHandledPath)
    #print(filePath)
    for tmp_class in filePath:
        file_class.append(all_class_name[tmp_class.split('/')[-2]])
    print(file_class)

    allText = []
    for tfp in filePath:
        #print(tfp)
        #with open(tfp,'r') as fo:
        with codecs.open(tfp, 'rb') as co:
            text = co.read()
            encodeInfo = chardet.detect(text)
            text = text.decode(encodeInfo["encoding"])
            #text = ast.literal_eval(text)
            #读取处理过保存的数据文件
            tmpText = " "
            tmpText = tmpText.join(ast.literal_eval(text))
            #print(tmpText)
            #获取所有内容
            allText.append(tmpText)
    print(len(allText))
    #生成tfidf
    tfidf = TfidfVectorizer()
    tfidfModel = tfidf.fit(allText)
    #tfidf的矩阵形式表示
    tfidfResult = tfidfModel.transform(allText)
    print(tfidfResult)
    matrixResult = tfidfResult.todense()

    model = GaussianNB()
    for tmp in zip(matrixResult, file_class):
        model.partial_fit(matrixResult, file_class, all_class_name.values())

    predicted = model.predict(matrixResult[0])
    print(predicted)
コード例 #29
0
ファイル: practica_05_NB.py プロジェクト: nikasha89/AIA
def otrosEjemplos(ejemplo):
    switcherExample = {
        0:
        "Ejemplo 1: Clasificador Gaussiano Naive Bayes--> Caso más sencillo con datos manuales",
        1:
        "Ejemplo 2: Clasificador Gaussiano Naive Bayes--> Caso Datos de Iris",
        2:
        "Ejemplo 3: Clasificador Multnomial Naive Bayes --> Caso más sencillo con datos random",
    }
    if ejemplo in switcherExample:
        if ejemplo == 0:
            # Definimos los datos ejemplo:
            X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3,
                                                                         2]])
            Y = np.array([1, 1, 1, 2, 2, 2])
            # Definimos el clasificador Naive Bayes:
            clf = GaussianNB()
            clf.fit(X, Y)
            GaussianNB(priors=None)
            print(clf.predict([[-0.8, -1]]))
            clf_pf = GaussianNB()
            clf_pf.partial_fit(X, Y, np.unique(Y))
            GaussianNB(priors=None)
            print(clf_pf.predict([[-0.8, -1]]))
        elif ejemplo == 1:
            iris = datasets.load_iris()
            gnb = GaussianNB()
            y_pred = gnb.fit(iris.data, iris.target).predict(iris.data)
            print("Number of mislabeled points out of a total %d points : %d" %
                  (iris.data.shape[0], (iris.target != y_pred).sum()))
        elif ejemplo == 2:
            X = np.random.randint(5, size=(6, 100))
            y = np.array([1, 2, 3, 4, 5, 6])
            clf = MultinomialNB()
            clf.fit(X, y)
            MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
            print(clf.predict(X[2:3]))
コード例 #30
0
def train3(trainData):
    gnb = GaussianNB()
    X = []
    y = []
    batch_size = 100
    count = 0
    for i in trainData:
        X.append(i["features"])
        y.append(i["label"])
        count += 1
        if count == batch_size:
            model = gnb.partial_fit(X, y)
        count = 0
        X.clear()
        y.clear()

    joblib.dump(model, '/tmp/sk_example.pkl')
コード例 #31
0
def genCharsChrom(genNum):
	""" Return chromosome (list) of length l, each of which is made up of the characters from chars. 
		
		pre:
			isinstance(l, int)
			hasattr(chars, '__getitem__')
			hasattr(chars, '__len__')
			len(chars) > 0
		
		post[l, chars]:
			__old__.l == l
			__old__.chars == chars
			len(__return__) == l
			forall(__return__, lambda a: a in chars)
	"""
	
	l = np.random.random_integers(0,49,20)


	for i in range(0,50):
		if Y[i] == 1:
			#print("!!!!!!")
			l = np.append(l,[i])
			break


	for i in range(0,50):
		if Y[i] == -1:
			#print("######")
			l = np.append(l,[i])
			break

	x = X[l]
	y = Y[l]


	clf =  GaussianNB()

	clf = clf.partial_fit(x,y,[1,-1])

	return Individual([clf,genNum])
コード例 #32
0
def genCharsChrom():
    """ Return chromosome (list) of length l, each of which is made up of the characters from chars. 
		
		pre:
			isinstance(l, int)
			hasattr(chars, '__getitem__')
			hasattr(chars, '__len__')
			len(chars) > 0
		
		post[l, chars]:
			__old__.l == l
			__old__.chars == chars
			len(__return__) == l
			forall(__return__, lambda a: a in chars)
	"""

    l = np.random.random_integers(0, 99, 20)

    for i in range(0, 100):
        if Y[i] == 1:
            # print("!!!!!!")
            l = np.append(l, [i])
            break

    for i in range(0, 100):
        if Y[i] == -1:
            # print("######")
            l = np.append(l, [i])
            break

    x = X[l]
    y = Y[l]

    clf = GaussianNB()

    clf = clf.partial_fit(x, y, [1, -1])

    return clf
コード例 #33
0
class MixedNB(BaseEstimator, ClassifierMixin):
    """Naive Bayes for mix of continuous and nominal features. For continuous variables
    the likelihood of the features is assumed to be Gaussian and for nominal features the
    likelihood is multinomial."""

    def __init__(self, is_nominal=None, alpha=1., class_prior=None):
        """
        :param is_nominal: boolean array indicating which columns are nominal.
            For technical reasons, continuous variables must go first in X matrix!
        :param alpha: see MultinomialNB
        :param class_prior: see MultinomialNB
        """
        self.is_nominal = is_nominal
        self.multi_nb = MultinomialNB(alpha=alpha, class_prior=class_prior, fit_prior=class_prior is None)
        self.gauss_nb = GaussianNB(priors=class_prior)

    def _is_nominal(self, X):
        is_nominal = np.array(self.is_nominal)
        assert X.shape[1] == len(is_nominal), 'length of is_nominal array is not the same as number of features'
        assert all(sorted(is_nominal) == is_nominal), 'Continuous variables must go first in X matrix'
        return is_nominal

    def fit(self, X, y, **kwargs):
        is_nominal = self._is_nominal(X)
        if any(is_nominal):
            self.multi_nb.fit(X[:, is_nominal], y, **kwargs)
        if any(~is_nominal):
            self.gauss_nb.fit(X[:, ~is_nominal], y, **kwargs)
        return self

    def partial_fit(self, X, y, **kwargs):
        is_nominal = self._is_nominal(X)
        if any(is_nominal):
            self.multi_nb.partial_fit(X[:, is_nominal], y, **kwargs)
        if any(~is_nominal):
            self.gauss_nb.partial_fit(X[:, ~is_nominal], y, **kwargs)
        return self

    def _multi_joint_log_likelihood(self, X):
        is_nominal = self._is_nominal(X)
        if all(~is_nominal):
            return 0

        return X[:, is_nominal].dot(self.multi_nb.feature_log_prob_.T)

    def _gauss_joint_log_likelihood(self, X):
        is_nominal = self._is_nominal(X)
        if all(is_nominal):
            return 0

        X = X[:, ~is_nominal]
        joint_log_likelihood = []
        for i in range(np.size(self.gauss_nb.classes_)):
            n_ij = - 0.5 * np.sum(np.log(2. * np.pi * self.gauss_nb.sigma_[i, :]))
            n_ij -= 0.5 * np.sum(((X - self.gauss_nb.theta_[i, :]) ** 2) /
                                 (self.gauss_nb.sigma_[i, :]), 1)
            joint_log_likelihood.append(n_ij)

        joint_log_likelihood = np.array(joint_log_likelihood).T
        return joint_log_likelihood

    @property
    def classes_(self):
        if hasattr(self.multi_nb, 'classes_'):
            return self.multi_nb.classes_
        else:
            return self.gauss_nb.classes_

    @property
    def class_log_prior_(self):
        if hasattr(self.multi_nb, 'class_log_prior_'):
            return np.maximum(self.multi_nb.class_log_prior_, -1e10)
        else:
            return np.maximum(np.log(self.gauss_nb.class_prior_), -1e10)

    def predict_proba(self, X):
        loglike = self._multi_joint_log_likelihood(X) + self._gauss_joint_log_likelihood(X) + self.class_log_prior_
        norm = logsumexp(loglike, axis=1)
        probs = np.exp(loglike - norm[:, np.newaxis])
        return probs

    def predict(self, X):
        probs = self.predict_proba(X)
        return self.classes_[np.argmax(probs, 1)]

    def score(self, X, y, sample_weight=None):
        return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
コード例 #34
0
import numpy as np
from sklearn.naive_bayes import GaussianNB

x = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])

clf = GaussianNB()
print(clf.predict([[-0.8, -1]]))
clf_pf = GaussianNB()
clf_pf.partial_fit(x, y, np.unique(y))
print(clf_pf.predict([[-0.8, -1]]))
コード例 #35
0
ファイル: bayes_save.py プロジェクト: mangodream/Bayes
class Bayes(object):
    def __init__(self):
        self.clf = GaussianNB()

    def histo_plot(self, data):
        (mu, sigma) = norm.fit(data)
        # the histogram of the data
        n, bins, patches = plt.hist(data, 30, normed=1, facecolor='green', alpha=0.75)

        # add a 'best fit' line
        y = mlab.normpdf( bins, mu, sigma)
        l = plt.plot(bins, y, 'r--', linewidth=2)

    def training(self):
        # Training data: a and b
        # b is twice, three times, four times of a
        a = np.random.normal(1, 0.1, 3000)
        b = np.append(np.random.normal(2, 0.2, 1000), np.random.normal(3, 0.3, 1000))
        b = np.append(b, np.random.normal(4, 0.4, 1000))

        # X: b/a, Y: type, 2/3/4
        X = b/a
        Y = np.append(np.ones(1000) * 2, np.ones(1000) * 3)
        Y = np.append(Y, np.ones(1000) * 4)

        # Plot the histogram of b/a
        self.histo_plot(X[:1000])
        self.histo_plot(X[1000:2000])
        self.histo_plot(X[2000:3000])
        plt.xlabel('b/a')
        plt.ylabel('Probability')
        plt.title('Histogram of the training set b/a')
        plt.grid(True)
        plt.show()

        # Plot b/a
        plt.plot(X)
        plt.plot(Y, marker='o', markersize=5, label='Type')
        plt.legend()
        plt.title("Training Set b/a, and types")
        plt.ylabel("b/a")
        plt.xlabel("time (s)")
        plt.show()

        # Bayes Classifier
        X = X.reshape(-1, 1)
        self.clf.partial_fit(X, Y, np.unique(Y))
        print self.clf.class_prior_
        print self.clf.theta_
        print self.clf.sigma_

    def testing(self):
        # Testing data, c and d
        c = np.random.normal(2, 0.2, 300)
        d = np.append(np.random.normal(4, 0.4, 100), np.random.normal(6, 1, 100))
        d = np.append(d, np.random.normal(8, 1, 100))

        # X1: testing set, Y1: predicted result
        X1 = d/c
        Y1 = []
        for item in X1:
            Y1.append(self.clf.predict([[item]]))

        # Plot the histogram of d/c
        self.histo_plot(X1[:100])
        self.histo_plot(X1[100:200])
        self.histo_plot(X1[200:300])
        plt.xlabel('d/c')
        plt.ylabel('Probability')
        plt.title('Histogram of the testing set d/c')
        plt.grid(True)
        plt.show()

        # Plot d/c
        plt.plot(X1)
        plt.plot(Y1, marker='o', markersize=5, label='Type')
        plt.legend()
        plt.title("Testing Set d/c, and types")
        plt.ylabel("d/c")
        plt.xlabel("time (s)")
        plt.show()
コード例 #36
0
indx = [k for k in range(30)]
random.shuffle(indx)
#1.打乱文件的索引顺序,这样就能乱序训练了
rslt1 = []
rslt2 = []
rslt3 = []

# In[6]:

from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
X = np.loadtxt(tif_name[0])
y = np.loadtxt(lab_name[0])  #.reshape(-1, 1)
Xt = np.loadtxt(tif_name[29])
yt = np.loadtxt(lab_name[29])  #.reshape(-1, 1)
clf.partial_fit(X, y, classes=np.array([[0], [1], [2], [3], [4], [5]]))
scr = clf.score(Xt, yt)
rslt3.append(scr)
for n in tqdm(indx[1:-15]):

    X = np.loadtxt(tif_name[n])
    y = np.loadtxt(lab_name[n])  #.reshape(-1, 1)

    Xt = np.loadtxt(tif_name[29 - n])
    yt = np.loadtxt(lab_name[29 - n])  #.reshape(-1, 1)

    clf.partial_fit(X, y)
    scr = clf.score(Xt, yt)
    rslt3.append(scr)
print(rslt3)
コード例 #37
0
plt.rcParams['ytick.labelsize'] = 20
plt.rcParams['legend.fontsize'] = 20
plt.rcParams['figure.titlesize'] = 15

X_train, y_train, X_test, y_test = _Dataset()
col_test = X_test.columns
col_train = X_train.columns
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
X_train = pd.DataFrame(X_train, columns=col_train)
X_test = pd.DataFrame(X_test, columns=col_test)

prior = [1 / 7] * 7
clf = GaussianNB(priors=prior)
clf.fit(X_train, y_train["label"])

print(clf.score(X_test, y_test["label"]))
y_predict = clf.predict(X_test)

confusion_matrix_Cisco(y_test["label"], y_predict)
Report_Matrix(y_test["label"], y_predict)

clf_pf = GaussianNB(priors=prior)
clf_pf.partial_fit(X_train, y_train["label"], np.unique(y_train["label"]))
print(clf_pf.score(X_test, y_test["label"]))
y_predict_pf = clf_pf.predict(X_test)

confusion_matrix_Cisco(y_test["label"], y_predict_pf)
Report_Matrix(y_test["label"], y_predict_pf)
コード例 #38
0
ファイル: test.py プロジェクト: huangshenno1/practice
trainLabel1 = []
clf1 = GaussianNB()
iter = 1

for sentence in sentences:
    text = sentence.string
    x = vec.vector(text)
    trainData.append(x)
    if sentence.get('opinionated') == 'N': emotion = 'none'
    else: emotion = sentence.get('emotion-1-type')
    trainLabel1.append(label[emotion])
    if len(trainData) >= 1000:
	trainData = array(trainData)
	trainLabel1 = array(trainLabel1)
	print 'Training emotion 1...%d' % iter
	clf1.partial_fit(trainData, trainLabel1, unique(trainLabel1))
	trainData = []
	trainLabel1 = []
	iter += 1

print 'Testing...'
output = []
soup = BeautifulSoup(''.join(open('../data/Testing data for Emotion Classification.xml').readlines()))
weibos = soup.find_all('weibo')
for weibo in weibos:
    weibo_id = weibo.get('id')
    if int(weibo_id) > 6000: break
    text = ''
    sentences = weibo.find_all('sentence')
    for sentence in sentences:
	text += sentence.string + ' '
コード例 #39
0
#-*- encoding:utf-8 -*-
r'''
高斯贝叶斯分类器本质是一个线性分类器,
它是贝叶斯分类器在联合概率分布满足
高斯分布的特殊情况
'''
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
Y = np.array([1, 1, 1, 2, 2, 2])

from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
print clf.fit(X, Y)
#>>GaussianNB()
print(clf.predict([[-0.8, -1],[2,4]]))
#>>[1 2]


#批量训练,即将数据集分成一块块,对于大数据集十分有效
clf_pf = GaussianNB()
print clf_pf.partial_fit(X, Y,np.unique(Y))
#>>GaussianNB()
print(clf_pf.predict([[-0.8, -1],[2,4]]))
#>>[1 2]
コード例 #40
0
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
Y = np.array([1, 1, 1, 2, 2, 2])

from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X, Y)
GaussianNB()
print(clf.predict([[-0.8, -1]]))

clf_pf = GaussianNB()
clf_pf.partial_fit(X, Y, np.unique(Y))
GaussianNB()
print(clf_pf.predict([[-0.8, -1]]))
コード例 #41
0
import numpy as np
import pandas as pd
from sklearn import cross_validation

print '---------- Naive Bayes -----------'

df = pd.read_csv("~/Desktop/My DM/Baltimore/Baltimore.csv",low_memory=False)

features = ["Month of the Crime","Mean Temperature","Mean Dew Point","Mean Visibility","Max Humidity","Mean Wind Speed","Max Sea Level"]

x = df[features]
y = df["Crime Type"]

print 'Partial Fit - training classifier'
clf_pf = GaussianNB()
clf_pf.partial_fit(x, y, np.unique(y))

print '--Cross Validation--'
scores = cross_validation.cross_val_score(clf_pf, x, y, cv=5)
print scores.mean()

print '--Random Split--'
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(x, y, test_size=0.2, random_state=0)
clf1 = GaussianNB().fit(X_train, Y_train)
print clf1.score(X_test, Y_test)

# Test file
df_test = pd.read_csv("~/Desktop/My DM/Baltimore/Test_Baltimore.csv",low_memory=False)
xt = df_test[features]
print 'Partial Fit Predicted - '+str(clf_pf.predict(xt))
print 'Predict Probability - '+str(clf_pf.predict_proba(xt))
コード例 #42
0
import numpy as np



X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
Y = np.array([1, 1, 1, 2, 2, 2])

from sklearn.naive_bayes import GaussianNB

clf = GaussianNB()
clf.fit(X, Y)
GaussianNB(priors=None)
print(clf.predict([[-0.8, -1]]))

clf_pf = GaussianNB()
clf_pf.partial_fit(X, Y, np.unique(Y))
GaussianNB(priors=None)
print(clf_pf.predict([[-0.8, -1]]))


import matplotlib.pyplot as plt
plt.plot(X[0],'bo')
plt.ylabel('y-label')
plt.show()



コード例 #43
0
ファイル: NaiveBayes.py プロジェクト: lovi9573/sleepybox
class Evaluator(EvaluatorBase):
    '''
    This Evaluator requires the following for each module
    [model]
    screenfile    naivebayes_screen_model
    sleepfile    naivebayes_sleep_model
    history        5
    minsleeptime    240
    minscreentime    240
    '''


    def __init__(self, config):
        '''
        Constructor
        Accepts a module configuration dictionary
        '''
        self.config = config
        self.historylen = config.get("model",{}).get("history",5)
        self.history = [0.0]*(len(self.modules)*self.historylen)
        #TODO: Discover these.
        self.modules = self.config.get("modules",[])
        self.modules = self.modules +['bias']
                                                  
        self.screenfile = config.get("model",{}).get("screenfile",os.environ.get("HOME")+"naivebayesscreenmodel")
        if os.path.isfile(self.screenfile):
            self.screenclassifier = pickle.load(self.screenfile)
        else:
            self.screenclassifier = GaussianNB()
            self.screenclassifier.fit(np.asarray(self.history, dtype = np.float32),np.zeros([1]))
        self.sleepfile = config.get("model",{}).get("sleepfile",os.environ.get("HOME")+"naivebayessleepmodel")
        if os.path.isfile(self.sleepfile):
            self.sleepclassifier = pickle.load(self.sleepfile)
        else:
            self.sleepclassifier = GaussianNB()
            self.sleepclassifier.fit(np.asarray(self.history, dtype = np.float32),np.zeros([1]))
        
    def eval(self, metrics):
        
        #preload expected inputs at 0.0
        inputs = {}
        for m in self.modules:
            inputs[m] = 0.0
        inputs['bias'] = 1.0
        #update given inputs
        for modulename,metric in metrics.iteritems():
            inputs[modulename] = metric
        #Transform into consistent order vector
        inputvector = []
        for inputname in sorted(self.modules):
            inputvector.append(inputs[inputname])
        self.history = self.history[0:-len(self.modules)] + inputvector
        npinvec = np.asarray(self.history, dtype=np.float32)
        sleepdecision = self.sleepclassifier.predict(npinvec)[0] == 1
        screendecision = self.sleepclassifier.predict(npinvec)[0] == 1
        return {'sleep':sleepdecision,
                'screenoff':screendecision}
        
    def update(self,timeslept, timescreenoff):
        #TODO: Use this knowledge about how long the sleep lasted to update the ML model.
        self.sleepclassifier.partial_fit(np.asarray(self.history, dtype=np.float32) ,
                                         np.asarray([timeslept > self.config.get("model",{}).get("minsleeptime",240)],dtype=np.int ))
        self.screenclassifier.partial_fit(np.asarray(self.history, dtype=np.float32) ,
                                         np.asarray([timescreenoff > self.config.get("model",{}).get("minscreentime",240)],dtype=np.int ))
        
コード例 #44
0
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind((HOST, PORT))
s.listen(5)
classes = [1, 2, 3, 4]
dump_path = "dump.pkl"

try:
    # Loads dump if exists
    clf = joblib.load(dump_path)
except IOError:
    # No dump, loads base samples
    base = np.loadtxt("base.txt")
    y_base = base[:,0] # col 0: delay in minutes
    X_base = base[:,[1,2,3]] # cols 1(wind spd), 2(dew diff), 3(sky cover)
    normalize(X_base)
    
    # translate continuous to discrete output for classifier
    y_base[y_base <= 5] = 1
    y_base[np.logical_and(y_base > 5, y_base <= 30)] = 2
    y_base[np.logical_and(y_base > 30, y_base <= 60)] = 3
    y_base[y_base > 60] = 4
    
    clf = GaussianNB()
    clf.partial_fit(X_base, y_base, classes)
    joblib.dump(clf, 'dump.pkl')

while True:
    # receive data from web app
    (clientsocket, address) = s.accept()
    t = threading.Thread(target=worker, args=(clientsocket,))
    t.start()