Example #1
0
from sklearn.naive_bayes import BernoulliNB
from sklearn.svm import LinearSVC, SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn import metrics

from old_hamshahri_reader import OldHamshahriReader
import config



tuned_params = [{'C': [1, 10, 100, 1000]}]
svc_tuned_params = [{'kernel': ['rbf'], 'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001]},]
if __name__ == '__main__':
    rd = OldHamshahriReader(root=config.CORPORA_ROOT)
    docs, labels = rd.sklearn_docs(config.TOT_DOCS)
    #vectorizer = CountVectorizer(docs)
    vectorizer = TfidfVectorizer(lowercase=False, max_df=0.8)

    fs = vectorizer.fit_transform(docs)
    #vectorizer.build_preprocessor()
    selector = SelectPercentile(chi2, percentile=10)
    selector.fit(fs, labels)
    fs = selector.transform(fs)
    fs_train, fs_test, labels_train, labels_test = train_test_split(
        fs, labels, test_size=0.4, random_state=0
    )

    clf = None
    pred = None
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn import metrics

from old_hamshahri_reader import OldHamshahriReader
import config

tuned_params = [{'C': [1, 10, 100, 1000]}]
svc_tuned_params = [
    {
        'kernel': ['rbf'],
        'C': [1, 10, 100, 1000],
        'gamma': [0.001, 0.0001]
    },
]
if __name__ == '__main__':
    rd = OldHamshahriReader(root=config.CORPORA_ROOT)
    docs, labels = rd.sklearn_docs(config.TOT_DOCS)
    #vectorizer = CountVectorizer(docs)
    vectorizer = TfidfVectorizer(lowercase=False, max_df=0.8)

    fs = vectorizer.fit_transform(docs)
    #vectorizer.build_preprocessor()
    selector = SelectPercentile(chi2, percentile=10)
    selector.fit(fs, labels)
    fs = selector.transform(fs)
    fs_train, fs_test, labels_train, labels_test = train_test_split(
        fs, labels, test_size=0.4, random_state=0)

    clf = None
    pred = None
    grid_search = False