예제 #1
0
def run_model(training, testing, fields, labels):
    train_fields = fields.iloc[training].reset_index(drop = True)
    train_labels = [labels[i] for i in training]
    test_fields = fields.iloc[testing].reset_index(drop = True)
    test_labels = [labels[i] for i in testing]

    clf = CategoricalNB()
    clf.fit(train_fields, train_labels)

    res = clf.predict(test_fields).tolist()

    accuracy = []
    for i in range(len(res)):
        if res[i] == 1 and test_labels[i] == 0:
            accuracy.append(1)
        elif res[i] == 0 and test_labels[i] == 1:
            accuracy.append(-1)
        else:
            accuracy.append(0)

    fp = sum([1 if accuracy[i] == 1 else 0 for i in range(len(accuracy))])/len(accuracy)
    fn = sum([1 if accuracy[i] == -1 else 0 for i in range(len(accuracy))])/len(accuracy)
    acc = sum([1 if accuracy[i] == 0 else 0 for i in range(len(accuracy))])/len(accuracy)
    print("false positive rate: %4f" % fp)
    print("false negative rate: %4f" % fn)
    print("accuracy: %4f" % acc)
    return res, acc, fp, fn
예제 #2
0
def run_kfold(fields, labels):
    kf = KFold(n_splits=5)
    best = [], []
    best_accuracy = 0

    # train_index and test_index index into fields and labels
    for train_index, test_index in kf.split(fields):
        train_fields = fields.iloc[train_index].reset_index(drop = True)
        train_labels = [labels[i] for i in train_index]
        test_fields = fields.iloc[test_index].reset_index(drop = True)
        test_labels = [labels[i] for i in test_index]

        clf = CategoricalNB()
        clf.fit(train_fields, train_labels)

        try:
            res = clf.predict(test_fields).tolist()
        except IndexError:
            continue
        
        accuracy = []
        for i in range(len(res)):
            if res[i] == test_labels[i]:
                accuracy.append(1)
            else:
                accuracy.append(0)
        accuracy = [1 if res[i] == test_labels[i] else 0 for i in range(len(res))]
        acc = sum(accuracy)/len(accuracy)

        if (acc > best_accuracy):
            best = train_index, test_index
            best_accuracy = acc

        print("accuracy rate: ", acc)
    return best
예제 #3
0
def cnb(train_data, train_classes, test_data, test_classes):
    transformer = SimpleImputer(missing_values=-2, strategy=MISSING_STRATEGY)
    transformer.fit(test_data, test_classes)
    classifier = make_pipeline(transformer, CategoricalNB())
    classifier.fit(train_data, train_classes.values.ravel())
    predictions = classifier.predict(test_data)
    num_correct = 0.0
    for i in range(len(test_classes)):
        if predictions[i] == test_classes.iat[i, 0]:
            num_correct += 1
    accuracy = num_correct / len(test_classes)
    print("CategoricalNB classifier with no smoothing has accuracy " +
          str(accuracy))

    best_accuracy = -1
    best_laplace = -1

    for laplace in range(1, 100):
        #decimal_laplace = (laplace*1.0) / 100
        #print(decimal_laplace)
        transformer = SimpleImputer(missing_values=-2,
                                    strategy=MISSING_STRATEGY)
        classifier = make_pipeline(transformer, CategoricalNB(alpha=laplace))
        classifier.fit(train_data, train_classes.values.ravel())
        predictions = classifier.predict(test_data)
        num_correct = 0.0
        for i in range(len(test_classes)):
            if predictions[i] == test_classes.iat[i, 0]:
                num_correct += 1
        accuracy = num_correct / len(test_classes)
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_laplace = laplace
    print("Best classifier with smoothing has alpha = " + str(best_laplace))
    print("And accuracy " + str(best_accuracy))
예제 #4
0
def categoricalNaiveBayes(dtrain, dtest):
    # can use split?
    y_train = dtrain[:, -1]
    x_train = dtrain[:, :-1]
    scaler = preprocessing.MinMaxScaler()
    scaler.fit(x_train)
    x_train = scaler.transform(x_train)
    newscaler = preprocessing.MinMaxScaler()
    newscaler.fit(dtest)
    dtest = newscaler.transform(dtest)
    #print(x_train[x_train < 0])
    gnb = CategoricalNB()
    gnb.fit(x_train, y_train)
    print("GNB Features")
    print("GNB cat count Features")
    print(gnb.category_count_)
    print("GNB class count Features")
    print(gnb.class_count_)
    print("GNB feature log prob Features")
    print(gnb.feature_log_prob_)
    print("GNB n Features")
    print(gnb.n_features_)
    print("Length test")
    print(len(dtest[0]))
    predictions = gnb.predict(dtest)
    return predictions
예제 #5
0
def test_alpha():
    # Setting alpha=0 should not output nan results when p(x_i|y_j)=0 is a case
    X = np.array([[1, 0], [1, 1]])
    y = np.array([0, 1])
    nb = BernoulliNB(alpha=0.)
    assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[1, 0], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    nb = MultinomialNB(alpha=0.)
    assert_warns(UserWarning, nb.partial_fit, X, y, classes=[0, 1])
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[2. / 3, 1. / 3], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    nb = CategoricalNB(alpha=0.)
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[1., 0.], [0., 1.]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    # Test sparse X
    X = scipy.sparse.csr_matrix(X)
    nb = BernoulliNB(alpha=0.)
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[1, 0], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    nb = MultinomialNB(alpha=0.)
    assert_warns(UserWarning, nb.fit, X, y)
    prob = np.array([[2. / 3, 1. / 3], [0, 1]])
    assert_array_almost_equal(nb.predict_proba(X), prob)

    # Test for alpha < 0
    X = np.array([[1, 0], [1, 1]])
    y = np.array([0, 1])
    expected_msg = ('Smoothing parameter alpha = -1.0e-01. '
                    'alpha should be > 0.')
    b_nb = BernoulliNB(alpha=-0.1)
    m_nb = MultinomialNB(alpha=-0.1)
    c_nb = CategoricalNB(alpha=-0.1)
    assert_raise_message(ValueError, expected_msg, b_nb.fit, X, y)
    assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y)
    assert_raise_message(ValueError, expected_msg, c_nb.fit, X, y)

    b_nb = BernoulliNB(alpha=-0.1)
    m_nb = MultinomialNB(alpha=-0.1)
    assert_raise_message(ValueError,
                         expected_msg,
                         b_nb.partial_fit,
                         X,
                         y,
                         classes=[0, 1])
    assert_raise_message(ValueError,
                         expected_msg,
                         m_nb.partial_fit,
                         X,
                         y,
                         classes=[0, 1])
예제 #6
0
def test_categoricalnb_min_categories_errors(min_categories, error_msg):

    X = np.array([[0, 0], [0, 1], [0, 0], [1, 1]])
    y = np.array([1, 1, 2, 2])

    clf = CategoricalNB(alpha=1, fit_prior=False, min_categories=min_categories)
    with pytest.raises(ValueError, match=error_msg):
        clf.fit(X, y)
예제 #7
0
    def setUp(self):
        rng = np.random.RandomState(1)

        self.X = rng.randint(5, size=(6, 100))
        y = np.array([1, 2, 3, 4, 5, 6])

        model = CategoricalNB()
        model.fit(self.X, y)
        self.model = model
예제 #8
0
    def fit(self, x, y, **fit_params):
        batch_size = self._batch_size
        self._model = CategoricalNB(*self._args, **self._kwargs)

        for index in range(batch_size, x.shape[0] + batch_size, batch_size):
            self._model.partial_fit(x[index - batch_size:index, :].toarray(),
                                    y[index - batch_size:index],
                                    classes=self._classes)
        return self
def check_sklearn_dev():
    """
    This just verifies that sklearn 0.23-dev is installed properly
    by checking CategoricalNB results
    """
    rng = np.random.RandomState(1)
    X = rng.randint(5, size=(6, 100))
    y = np.array([1, 2, 3, 4, 5, 6])

    clf = CategoricalNB()
    clf.fit(X, y)
    assert [3] == clf.predict(X[2:3])
예제 #10
0
    def fit(self, X, Y):
        """
        Fit the classifier to training data X and lables Y.

        Arguments:
            X (np.array): training data matrix of shape (n_samples, n_features)
            Y (np.array): label matrix of shape (n_samples, n_labels)
        """
        n_labels = Y.shape[1]
        for idx in range(n_labels):
            Y_col = Y[:, idx]
            predictor = CategoricalNB()
            predictor.fit(X, Y_col)
            self.predictors.append(predictor)
예제 #11
0
def cnb(train_x, train_y, test_x, test_y):
    compnb = CategoricalNB()
    compnb.fit(train_x, train_y)
    y_predictions = compnb.predict(test_x)
    print("RMSE for Complement Naive Bayes model = ",
          mean_squared_error(test_y, y_predictions))
    my_f1 = f1_score(test_y, y_predictions, average='macro')
    print("f1_macro for Categorical Naive Bayes Classifier = ", my_f1)
    cm = confusion_matrix(test_y, y_predictions, normalize='true')
    sns.heatmap(cm, annot=True)
    plt.title('Confusion matrix of the Categorical Naive Bayes classifier')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.savefig('./output/CompNB.png')
    plt.show()
예제 #12
0
def test_categoricalnb_with_min_categories(
    min_categories, exp_X1_count, exp_X2_count, new_X, exp_n_categories_
):
    X_n_categories = np.array([[0, 0], [0, 1], [0, 0], [1, 1]])
    y_n_categories = np.array([1, 1, 2, 2])
    expected_prediction = np.array([1])

    clf = CategoricalNB(alpha=1, fit_prior=False, min_categories=min_categories)
    clf.fit(X_n_categories, y_n_categories)
    X1_count, X2_count = clf.category_count_
    assert_array_equal(X1_count, exp_X1_count)
    assert_array_equal(X2_count, exp_X2_count)
    predictions = clf.predict(new_X)
    assert_array_equal(predictions, expected_prediction)
    assert_array_equal(clf.n_categories_, exp_n_categories_)
예제 #13
0
def NaiveBayes(xtrain, ytrain, xtest, ytest, binary=False):
    if binary:
        nb = GaussianNB()
        model = "GaussianNB"
    else:
        nb = CategoricalNB()
        model = "CategoricalNB"

    nb.fit(xtrain, ytrain)
    nb.predict(xtest)
    y_pred_nb = nb.predict(xtest)
    y_prob_pred_nb = nb.predict_proba(xtest)
    # how did our model perform?
    count_misclassified = (ytest != y_pred_nb).sum()

    print(model)
    print("=" * 30)
    print('Misclassified samples: {}'.format(count_misclassified))
    accuracy = accuracy_score(ytest, y_pred_nb)
    print('Accuracy: {:.5f}'.format(accuracy))

    heatmap_confmat(ytest, y_pred_nb, "naive_bayes.png")

    feature_importance_NB(nb, xtest, ytest)
    print("Naive Bayes done")
예제 #14
0
def find_best_feature(selected_features, X_train, C_param, y_train, feature):
    # decide on which features we are using (selected_features + feature)
    features_in_use = np.append(selected_features, feature)
    X_train_filt = X_train[:, features_in_use]
    X_train_filt_ranked = bin_rank(X_train_filt)

    # Fit a Logistic regression model
    lr = CategoricalNB(alpha=1.0,
                       fit_prior=True,
                       class_prior=None,
                       min_categories=5).fit(X_train_filt_ranked, y_train)
    #lr = LogisticRegression(C=C_param, random_state=0).fit(X_train_filt_ranked, y_train)
    # Get the accuracy rate using the validation set
    accu_train = lr.score(X_train_filt_ranked, y_train)
    # Tuple format = (feature number, training accuracy found)
    return (feature, accu_train)
예제 #15
0
    def buildReturnModel(self):
        model = None

        if self.flavor:
            if self.exp_type == 'classification':
                if self.flavor == 'Bernoulli':
                    model = BernoulliNB(**self.default_args)
                elif self.flavor == 'Categorical':
                    model = CategoricalNB(**self.default_args)
                elif self.flavor == 'Complement':
                    model = ComplementNB(**self.default_args)
                elif self.flavor == 'Gaussian':
                    model = GaussianNB(**self.default_args)
                elif self.flavor == 'Multinomial':
                    model = MultinomialNB(**self.default_args)
            else:
                raise ValueError(
                    'Naive bayes can only be used for classification problems!'
                )
        else:
            raise ValueError(
                'cannot build model because the flavor of Naive Bayes is unknown!'
            )

        return model
예제 #16
0
def bayes(test_set, training_set, categories):
    classifier = CategoricalNB()

    x, y = build_xy(training_set, categories)
    classifier.fit(x, y)

    false_positives = 0
    false_negatives = 0
    true_positives = 0
    true_negatives = 0

    x, y = build_xy(test_set, categories)
    y_predicted = classifier.predict(x)
    print(f'score: {classifier.score(x, y)}')
    print('bayes confusion matrix')
    print(classification_report(y, y_predicted))
예제 #17
0
def selected_feature_check(data, X_train, y_train, selected_features,
                           selected_features_by_name, C_param, best_feature):
    # Save the previous selected features to check later if we have made a change
    prev_selected_features = selected_features.copy()
    prev_selected_features_by_name = selected_features_by_name.copy()
    # Add the best feature to the list
    selected_features = np.append(selected_features, best_feature)
    selected_features_by_name.append(data.columns[best_feature])
    feature_removal_score = {}

    # Iterate through each feature in the list and remove it
    for feature in selected_features:
        temp_features = np.setdiff1d(selected_features, np.array([feature]))
        X_train_filt = X_train[:, temp_features]
        X_train_filt_ranked = bin_rank(X_train_filt)
        # Fit a Logistic regression model
        lr = CategoricalNB(alpha=1.0,
                           fit_prior=True,
                           class_prior=None,
                           min_categories=5).fit(X_train_filt_ranked, y_train)
        #lr = LogisticRegression(C=C_param, random_state=0).fit(X_train_filt_ranked, y_train)
        # Get the accuracy rate using the validation set
        accu_train = lr.score(X_train_filt_ranked, y_train)
        feature_removal_score[feature] = accu_train

    # Get the feature which causes the highest accuracy without it
    max_key = max(feature_removal_score,
                  key=lambda k: feature_removal_score[k])
    selected_features = np.setdiff1d(selected_features, np.array([max_key]))
    max_key_name = data.columns[max_key]
    selected_features_by_name = list(
        set(selected_features_by_name) - set([max_key_name]))

    # Check if we have made any changes, if not let the caller know that this function is no longer needed
    if np.array_equal(selected_features, prev_selected_features):
        print(
            "-----------------------------------------------------------------------------------------------"
        )
        print(
            "We have found the unchanged set, thus from now on we are just adding to our selected features.\n"
        )
        print(
            "-----------------------------------------------------------------------------------------------"
        )
        return max_key, selected_features, selected_features_by_name, True
    else:
        return max_key, selected_features, selected_features_by_name, False
예제 #18
0
파일: SciKit.py 프로젝트: jareie/Connect4T
def GetLearningAutomata(typeOfAutomata):
    if typeOfAutomata == "Categorical":
        return CategoricalNB()
    elif typeOfAutomata == "Gaussian":
        return GaussianNB()
    elif typeOfAutomata == "DecisionTree":
        return DecisionTreeClassifier()
    elif typeOfAutomata == "LinearSVC":
        return LinearSVC()
예제 #19
0
def naive_bayes_fit_and_predict(X_train, X_test, Y_train, Y_test):
    gnb, mnb, cnb, bnb, canb = GaussianNB(), MultinomialNB(), ComplementNB(
    ), BernoulliNB(), CategoricalNB()
    Y_pred_gnb = gnb.fit(X_train, Y_train).predict(X_test)
    Y_pred_mnb = mnb.fit(X_train, Y_train).predict(X_test)
    Y_pred_cnb = cnb.fit(X_train, Y_train).predict(X_test)
    Y_pred_bnb = bnb.fit(X_train, Y_train).predict(X_test)
    Y_pred_canb = canb.fit(X_train, Y_train).predict(X_test)
    return Y_pred_gnb, Y_pred_mnb, Y_pred_cnb, Y_pred_bnb, Y_pred_canb
예제 #20
0
def test_incremental_validation(X=None, y=None, iterations=10, verbose=1):
    if not X:
        X, y = make_classification(n_samples=500,
                                   n_features=1000,
                                   n_informative=20,
                                   n_redundant=1,
                                   n_repeated=0,
                                   n_classes=2,
                                   n_clusters_per_class=2,
                                   weights=None,
                                   class_sep=1,
                                   hypercube=False,
                                   scale=1.0,
                                   shuffle=True,
                                   random_state=0)
    X //= 10  # --> To be able to evaluate categoricalNB

    # classifiers
    nb_classifier = NaiveBayes(encode_data=True)
    nb_classifier_no_encoding = NaiveBayes(encode_data=False)
    custom_encoder = CustomOrdinalFeatureEncoder()
    cnb = CategoricalNB()

    # accumulators
    categorical_nb = []
    custom_nb_val_1 = []
    custom_nb_val_2 = []
    custom_nb_val_3 = []
    custom_nb_val_4 = []
    for i in range(iterations):
        if verbose:
            print(f"Iteration {i}")
        ts = time()
        X2 = custom_encoder.fit_transform(X)

        ts = time()
        score_2 = nb_classifier.leave_one_out_cross_val(X, y)
        custom_nb_val_1.append(time() - ts)

        ts = time()
        score_4 = cross_leave_one_out(nb_classifier, X, y)
        custom_nb_val_3.append(time() - ts)

        ts = time()
        X2 = custom_encoder.fit_transform(X)
        score_5 = cross_leave_one_out(nb_classifier_no_encoding, X2, y)
        custom_nb_val_4.append(time() - ts)

        if i == 0:
            score_1 = score_2
            scores = [score_1, score_2, score_4, score_5]
            assert all(score == scores[0] for score in scores)
    print("Categorical with scikit loo: ", np.mean(categorical_nb[1:]))
    print("Custom with scikit loo: ", np.mean(custom_nb_val_3[1:]))
    print("Custom with scikit loo (pre-encoding): ",
          np.mean(custom_nb_val_4[1:]))
    print("Custom with first incremental: ", np.mean(custom_nb_val_1[1:]))
class CategoricalBatchNB(TransformerMixin):
    def __init__(self, batch_size, classes, *args, **kwargs):
        self._batch_size = batch_size
        self._classes = classes
        self._args = args
        self._kwargs = kwargs
        self._model = CategoricalNB(*args, **kwargs)

    def fit(self, x, y, **fit_params):
        batch_size = self._batch_size
        self._model = CategoricalNB(*self._args, **self._kwargs)

        for index in tqdm(range(batch_size, x.shape[0] + batch_size, batch_size)):
            self._model.partial_fit(
                x[index - batch_size:index, :].toarray(),
                y[index - batch_size:index],
                classes=self._classes
            )
        return self

    @staticmethod
    def transform(x, y=None, **fit_params):
        return x

    def predict(self, x):
        batch_size = self._batch_size
        predictions = []
        for index in tqdm(range(batch_size, x.shape[0] + batch_size, batch_size)):
            predictions.extend(
                self._model.predict(
                    x[index - batch_size:index, :].toarray()
                ).tolist()
            )
        return np.array(predictions).ravel()

    def score(self, x, y):
        y_pred = self.predict(x)
        return accuracy_score(y, y_pred)

    def __str__(self):
        return "CategoricalBatchNB()"

    def __repr__(self):
        return self.__str__()
예제 #22
0
    def test_predict(self, model, dummy_cat_X, dummy_cat_y):
        # reduce alpha to ensure no smoothing
        y = CategoricalNB(alpha=1.0e-10)\
                            .fit(dummy_cat_X, dummy_cat_y)\
                            .predict(dummy_cat_X)

        model.fit(dummy_cat_X, dummy_cat_y)
        y_hat = model.predict(dummy_cat_X)

        np.testing.assert_array_equal(y, y_hat)
def test_naive_bayes():
    x, y = load_simple_data()
    logging.info(f"My Bayes 运行结果:")
    model = MyCategoricalNB(alpha=0)
    model.fit(x, y)
    logging.info(model.predict(np.array([[0, 1, 0]]), with_prob=True))
    logging.info(f"CategoricalNB 运行结果:")
    model = CategoricalNB(alpha=0)
    model.fit(x, y)
    logging.info(model.predict(np.array([[0, 1, 0]])))
    logging.info(model.predict_proba(np.array([[0, 1, 0]])))
def pengujian():
    if "admin" not in session:
        return redirect(url_for("index"))
    mydb.connect()
    cursor = mydb.cursor()
    cursor.execute(
        "SELECT * FROM preprocessing WHERE preprocessing.tahundibuat REGEXP '(2005|2006|2007|2008|2009|2010|2011|2012|2013|2014|2015|2016|2017|2018|2019)'"
    )
    training = cursor.fetchall()
    X = [[x[0], x[1], x[2], x[3], x[4]] for x in training]
    y = [x[5] for x in training]
    # X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
    clf = CategoricalNB()
    clf.fit(X, y)
    cursor.execute(
        "SELECT * FROM preprocessing WHERE preprocessing.tahundibuat REGEXP '(2020)'"
    )
    testing = cursor.fetchall()
    X_test = [[x[0], x[1], x[2], x[3], x[4]] for x in testing]
    y_test = [x[5] for x in testing]
    predicted = clf.predict(X_test)
    payload = []
    for index, x in enumerate(X_test):
        arr = x
        arr.append(y[index])
        payload.append({
            "no": index + 1,
            "stasiuntv": arr[0],
            "genre": arr[1],
            "writer": arr[2],
            "director": arr[3],
            "actor": arr[4],
            "status": arr[5],
        })
    hasil = confusion_matrix(y_test, predicted)
    akurasi = (hasil[0][0] + hasil[1][1]) / (hasil[0][0] + hasil[0][1] +
                                             hasil[1][0] + hasil[1][1])

    return render_template("pengujian.html",
                           hasil=hasil,
                           akurasi=round(akurasi * 100))
예제 #25
0
def test_predict_meta_override():
    X = pd.DataFrame({"c_0": [1, 2, 3, 4]})
    y = np.array([1, 2, 3, 4])

    base = CategoricalNB()
    base.fit(pd.DataFrame(X), y)

    dd_X = dd.from_pandas(X, npartitions=2)
    dd_X._meta = pd.DataFrame({"c_0": [5]})

    # Failure when not proving predict_meta
    # because of value dependent model
    wrap = ParallelPostFit(base)
    with pytest.raises(ValueError):
        wrap.predict(dd_X)

    # Success when providing meta over-ride
    wrap = ParallelPostFit(base, predict_meta=np.array([1]))
    result = wrap.predict(dd_X)
    expected = base.predict(X)
    assert_eq_ar(result, expected)
예제 #26
0
def example_weather_nominal():
    path = (base_path / "weather-nominal.csv").resolve()
    series = pd.read_csv(path)
    # arrange table in X(features) and y(target)
    X = series.iloc[:, :-1]
    X = X.apply(LabelEncoder().fit_transform)
    y = series.iloc[:, -1]
    # apply GaussianNB and CategoricalNB
    gNB = GaussianNB()
    gNB.fit(X, y)
    cNB = CategoricalNB()
    cNB.fit(X, y)
    print(
        f"Prediction GaussianNB ([Sunny,Cool,High,True]]): {gNB.predict([[2,0,0,1]])}"
    )
    print(f"Probability GaussianNB: {gNB.predict_proba([[2,0,0,1]])}")
    print("\n")
    print(
        f"Prediction CategoricalNB ([Sunny,Cool,High,True]]): {cNB.predict([[2, 0, 0, 1]])}"
    )
    print(f"Probability CategoricalNB: {cNB.predict_proba([[2, 0, 0, 1]])}")
예제 #27
0
def example_weather_numeric():
    path = (base_path / "weather-numeric.csv").resolve()
    series = pd.read_csv(path)
    # arrange table in X(features) and y(target)
    X = series.iloc[:, :-1]
    X.outlook = LabelEncoder().fit_transform(X.outlook)
    X.windy = LabelEncoder().fit_transform(X.windy)
    y = series.iloc[:, -1]
    # apply GaussianNB and CategoricalNB
    gNB = GaussianNB()
    gNB.fit(X, y)
    cNB = CategoricalNB()
    cNB.fit(X, y)
    print(
        f"Prediction GaussianNB ([Sunny,66,90,True]]]): {gNB.predict([[2, 66, 90, 1]])}"
    )
    print(f"Probability GaussianNB: {gNB.predict_proba([[2, 66, 90, 1]])}")
    print("\n")
    print(
        f"Prediction CategoricalNB ([Sunny,66,90,True]]): {cNB.predict([[2, 66, 90, 1]])}"
    )
    print(f"Probability CategoricalNB: {cNB.predict_proba([[2, 66, 90, 1]])}")
def test_spam_classification():
    x_train, x_test, y_train, y_test = load_data()
    model = MyCategoricalNB(alpha=1.0)
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    logging.info(f"My Bayes 运行结果:")
    logging.info(classification_report(y_test, y_pred))

    model = CategoricalNB()
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    logging.info(f"CategoricalNB 运行结果:")
    logging.info(classification_report(y_test, y_pred))
def classificationCategoricalNaiveBayes():
    col_names = [
        '*', 'web1', 'web2', 'cosine', 'len', 'word', 'sameDomain', 'label'
    ]
    #load dataset
    pima = pd.read_csv("data.csv", names=col_names)

    #split dataset in features and target variable
    feature_cols = ['cosine', 'len', 'word', 'sameDomain']
    X = pima[feature_cols]  # Features
    y = pima.label  # Target variable

    #Split dataset into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=4)  # 80% training and 20% test

    clf = CategoricalNB()
    clf.fit(X_train, y_train)

    # save the model
    dump(clf, open('model.pkl', 'wb'))
    startTime = datetime.now()
    #Predict the response for test dataset
    y_pred = clf.predict(X_test)
    endTime = datetime.now()

    print("exec time :", endTime - startTime)

    #Model Accuracy, how often is the classifier correct?
    print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

    print("precision:", metrics.average_precision_score(y_test, y_pred))

    print("recall:", metrics.recall_score(y_test, y_pred))

    print()

    print(confusion_matrix(y_test, y_pred))
예제 #30
0
def setupClf(method, param):
    if method == 'knn':
        return KNeighborsClassifier(n_neighbors=param)
    elif method == 'bayes':
        return CategoricalNB(alpha=param)
    elif method == 'forest':
        return RandomForestClassifier(\
            n_estimators=param[0],\
            max_depth=param[1],\
            min_samples_leaf=param[2],\
            ccp_alpha=param[3]\
        )
    elif method == 'svm':
        return SVC(C=param)