Beispiel #1
0
 def test(self):
     """
     Test the model with best parameters found in randomSearch() or gridSearch()
     :return:
     """
     # self.clf = NaiveBayes(fit_prior=True, binarize=0.0, alpha=0.5)
     self.clf = NaiveBayes()
     self.clf.set_params(**self.best_parameter)
     print("*** Test Result for Naive Bayes ***")
     ModelEvaluation.evaluateModelWithCV(self.clf,
                                         self.dataset_x,
                                         self.dataset_y,
                                         cv=10)
def load_classifier():
    """ Load classifier object from pickle. If no pickle found, it will
    create an empty classifier object
    Args:
         None
    Return:
          classifier: trained-classifier object
    """
    try:
        f = open('bayesian_classifier.pickle', 'rb')
        classifier = pickle.load(f)
        f.close()
    except IOError:
        print 'Bayesian classifier object not found. Will create now'
        classifier = NaiveBayes(alpha=0.01)
        data, label = load_training_data()
        bag_of_word, label = training_data_process(data, label)
        classifier.fit(bag_of_word, label)
    return classifier
Beispiel #3
0
def testNaiveBayes(trainx, trainy, testx, testy, cols):
    '''
    Tests a Naive Bayes classifier on given input data.
    :param trainx: training data
    :param trainy: training labels
    :param testx: test data
    :param testy: test labels
    :param cols: the chosen cols to use for the test
    :return: error, cols as a string
    '''

    # train classifier
    clf = NaiveBayes()
    clf.fit(trainx, trainy)
    # error check
    corr = 0
    total = 0
    pred = list(clf.predict(testx))
    for p in pred:
        if p == testy[total]: corr += 1
        total += 1
    auc = roc_auc_score(testy, pred)
    # roc = roc_curve(testy, pred)
    return corr / total, ''.join(["%02d" % x for x in cols])
Beispiel #4
0
    def __init__(self, dataset_x, dataset_y):
        self.dataset_x = dataset_x
        self.dataset_y = dataset_y

        self.clf = NaiveBayes()
        self.best_parameter = {}
Beispiel #5
0
def reproducing_sakar():
    classifiers = {
        "Naive Bayes": NaiveBayes(),
        "Logistic Regression": LogisticRegression(),
        "k-NN": KNN(p=1, n_neighbors=1),
        "Multilayer Perceptron": MLP(),
        "Random Forest": RandomForest(n_estimators=100),
        "SVM (Linear)": SVM(kernel="linear", gamma="auto"),
        "SVM (RBF)": SVM(kernel="rbf", gamma="auto")
    }

    scores = {
        "subject": [],
        "Naive Bayes": [],
        "Logistic Regression": [],
        "k-NN": [],
        "Multilayer Perceptron": [],
        "Random Forest": [],
        "SVM (Linear)": [],
        "SVM (RBF)": []
    }

    f1s = {
        "subject": [],
        "Naive Bayes": [],
        "Logistic Regression": [],
        "k-NN": [],
        "Multilayer Perceptron": [],
        "Random Forest": [],
        "SVM (Linear)": [],
        "SVM (RBF)": []
    }

    mccs = {
        "subject": [],
        "Naive Bayes": [],
        "Logistic Regression": [],
        "k-NN": [],
        "Multilayer Perceptron": [],
        "Random Forest": [],
        "SVM (Linear)": [],
        "SVM (RBF)": []
    }

    voting = {"subject": [], "voted": [], "true": []}

    df = pd.read_csv("parkinsons.csv")
    df = df.drop(["gender"], axis=1)

    for i in range(252):
        print("SUBJECT {}".format(i))

        scores["subject"].append(i)
        f1s["subject"].append(i)
        mccs["subject"].append(i)

        train_set = df.loc[df["id"] != i].drop(["id"], axis=1)
        test_set = df.loc[df["id"] == i].drop(["id"], axis=1)

        X_train = train_set.drop(["class"], axis=1)
        y_train = train_set["class"]
        X_test = test_set.drop(["class"], axis=1)
        y_test = test_set["class"]

        scaler = StandardScaler().fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)

        #pca = PCA(n_components=50)
        pca = LDA()
        pca.fit(X_train, y_train)
        X_train_pca = pca.transform(X_train)
        X_test_pca = pca.transform(X_test)

        print(X_train_pca.shape)

        predictions = []
        for name, classifier in classifiers.items():
            classifier.fit(X_train_pca, y_train)
            pred = classifier.predict(X_test_pca)

            score = round(accuracy_score(y_test, pred), 2)
            f1 = round(f1_score(y_test, pred), 2)
            mcc = round(matthews_corrcoef(y_test, pred), 2)

            scores[name].append(score)
            f1s[name].append(f1)
            mccs[name].append(mcc)

            predictions.extend(list(pred))

            print("{:<25}{} {} {}".format(name, score, f1, mcc))

        voted_label = mode(predictions)
        true_label = list(y_test)[0]

        voting["subject"].append(i)
        voting["voted"].append(voted_label)
        voting["true"].append(true_label)

        print("Voted/True: {}/{}".format(voted_label, true_label))
        print()

    scores = pd.DataFrame(scores)
    scores.to_csv("results/scores.csv", index=None)

    f1s = pd.DataFrame(f1s)
    f1s.to_csv("results/f1s.csv", index=None)

    mccs = pd.DataFrame(mccs)
    mccs.to_csv("results/mccs.csv", index=None)

    voting = pd.DataFrame(voting)
    voting.to_csv("results/voting.csv", index=None)

    print(scores)
    print(f1s)
    print(mccs)
    print(voting)
Beispiel #6
0
training_data = data.groupby('Category').head(30)
testing_data = data.groupby('Category').tail(20)

print("\nTraining Data")
print("===============\n")
print(training_data)
print("\nTesting Data")
print("===============\n")
print(testing_data)

# In[4]:

# Initialization of Naive Bayes and K nearest neighbour models.

bayes_classifier = NaiveBayes()
knn_classifier = KNN(n_neighbors=5)

# In[5]:

# Training of our Naive bayes and K nearest neighbour models.

# Input variables of training data
train_X = training_data.iloc[:, :4]

# Output variable of training data
train_Y = training_data.iloc[:, 4]

bayes_classifier.fit(train_X, train_Y)
knn_classifier.fit(train_X, train_Y)