Ejemplo n.º 1
0
def LogReg(n=5):
    X, y = feature_extraction.pre_process_comments()  # get the data
    X = SelectKBest(chi2, k=17000).fit_transform(X, y)
    '''
    LogReg_clf = Pipeline([
        ('feature_selection', SelectFromModel(LinearSVC(penalty="l1", dual=False, multi_class='crammer_singer', max_iter=5000), threshold="0.8*mean", max_features=50000)),
        ('classification', LogisticRegression(penalty='l1', solver='saga'))
    ])
    '''

    LogReg_clf = LogisticRegression(penalty='l1', dual=False)

    print("before fitting")

    LogReg_cv = cross_val_score(LogReg_clf, X, y, cv=n)

    LogReg_clf.fit(X, y)

    LogReg_pred = LogReg_clf.predict(X)

    LogReg_acc = accuracy_score(y_pred=LogReg_pred, y_true=y)

    print("cv acc of LogReg is ", LogReg_cv.mean())

    print("acc of logreg is ", LogReg_acc)

    return LogReg_clf
Ejemplo n.º 2
0
def Bernoulli(n=5):
    X, y = feature_extraction.pre_process_comments()  # get the data
    X = SelectKBest(chi2, k=17000).fit_transform(X, y)

    Bernoulli_clf = BernoulliNB()
    Bernoulli_cv = cross_val_score(Bernoulli_clf, X, y, cv=n)

    Bernoulli_clf.fit(X, y)
    Bernoulli_pred = Bernoulli_clf.predict(X)
    Bernoulli_acc = accuracy_score(y_true=y, y_pred=Bernoulli_pred)

    print("acc of BNB(sklearn) is ", Bernoulli_acc)
    print("cv acc of BNB(sklearn) is ", Bernoulli_cv.mean())

    return Bernoulli_clf
Ejemplo n.º 3
0
def MultiNB_Kaggle():
    X, y, test, test_ID = feature_extraction.pre_process_comments(
    )  # get the data
    k_best = SelectKBest(chi2, k=17000)
    X = k_best.fit_transform(X, y)
    MultiNB_clf = MultinomialNB(alpha=0.22)
    test_X = k_best.transform(test)

    MultiNB_clf.fit(X, y)
    MultiNB_pred = MultiNB_clf.predict(test_X)

    np.savetxt('predict.csv',
               np.array([test_ID, MultiNB_pred]).transpose(),
               delimiter=',',
               fmt='%s',
               header='Id, Category')
    return MultiNB_clf
Ejemplo n.º 4
0
def MultiNB(n=5):
    X, y = feature_extraction.pre_process_comments()  # get the data
    X = SelectKBest(chi2, k=17000).fit_transform(X, y)

    MultiNB_clf = MultinomialNB(alpha=0.22)

    MultiNB_cv = cross_val_score(MultiNB_clf, X, y, cv=n)

    MultiNB_clf.fit(X, y)

    MultiNB_pred = MultiNB_clf.predict(X)

    MultiNB_acc = accuracy_score(y_pred=MultiNB_pred, y_true=y)

    print("cv acc of multiNB is ", MultiNB_cv.mean())
    print("acc of multiNB is ", MultiNB_acc)

    return MultiNB_clf
Ejemplo n.º 5
0
def LSVC(n=5):
    X, y = feature_extraction.pre_process_comments()  # get the data
    X = SelectKBest(k=17000).fit_transform(X, y)

    LSVC_clf = LinearSVC(penalty='l1', dual=False)
    #LSVC_clf = BaggingClassifier(base_estimator=LSVC_clf, max_samples=0.6, max_features=12000, n_estimators=24, random_state=1)

    print("before fitting")

    LSVC_cv = cross_val_score(LSVC_clf, X, y, cv=n)

    LSVC_clf.fit(X, y)

    LSVC_pred = LSVC_clf.predict(X)

    LSVC_acc = accuracy_score(y_pred=LSVC_pred, y_true=y)

    print("cross validation accuracy", LSVC_cv.mean())

    print("training accuracy", LSVC_acc)

    return LSVC_clf
Ejemplo n.º 6
0
def ensemble(n=5):
    X, y = feature_extraction.pre_process_comments()  # get the data

    X = SelectKBest(k=17000).fit_transform(X, y)

    LogReg_clf = LogisticRegression(penalty='l1', dual=False)
    MNB_clf = MultinomialNB(alpha=0.22)
    LinearSVC_clf = LinearSVC(penalty='l1', dual=False)

    Ensemble_clf = VotingClassifier(estimators=[('lg', LogReg_clf),
                                                ('mnb', MNB_clf)],
                                    voting='soft')

    Ensemble_cv = cross_val_score(Ensemble_clf, X, y, cv=n)

    Ensemble_clf.fit(X, y)

    score = Ensemble_clf.score(X, y)

    print("cv acc of ensemble is ", Ensemble_cv.mean())
    print("acc of ensemble is ", score)

    return Ensemble_clf
Ejemplo n.º 7
0
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras import layers
import feature_extraction
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
import models.models as models
from sklearn.feature_selection import SelectKBest, chi2, VarianceThreshold
from sklearn.feature_selection import RFE
from sklearn.svm import SVR
from sklearn.preprocessing import LabelEncoder

X, y = feature_extraction.pre_process_comments()  # get the data

X = SelectKBest(chi2, k=17000).fit_transform(X, y)

LogReg_clf = LogisticRegression(penalty='l1', dual=False)
MNB_clf = MultinomialNB(alpha=0.3)
LinearSVC_clf = LinearSVC(penalty='l1', dual=False)

Ensemble_clf = VotingClassifier(estimators=[('lg', LogReg_clf),
                                            ('mnb', MNB_clf)],
                                voting='soft')

Ensemble_cv = cross_val_score(Ensemble_clf, X, y, cv=3)

Ensemble_clf.fit(X, y)