from sklearn.naive_bayes import BernoulliNB from sklearn.svm import LinearSVC, SVC from sklearn.tree import DecisionTreeClassifier from sklearn.grid_search import GridSearchCV from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier from sklearn import metrics from old_hamshahri_reader import OldHamshahriReader import config tuned_params = [{'C': [1, 10, 100, 1000]}] svc_tuned_params = [{'kernel': ['rbf'], 'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001]},] if __name__ == '__main__': rd = OldHamshahriReader(root=config.CORPORA_ROOT) docs, labels = rd.sklearn_docs(config.TOT_DOCS) #vectorizer = CountVectorizer(docs) vectorizer = TfidfVectorizer(lowercase=False, max_df=0.8) fs = vectorizer.fit_transform(docs) #vectorizer.build_preprocessor() selector = SelectPercentile(chi2, percentile=10) selector.fit(fs, labels) fs = selector.transform(fs) fs_train, fs_test, labels_train, labels_test = train_test_split( fs, labels, test_size=0.4, random_state=0 ) clf = None pred = None
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier from sklearn import metrics from old_hamshahri_reader import OldHamshahriReader import config tuned_params = [{'C': [1, 10, 100, 1000]}] svc_tuned_params = [ { 'kernel': ['rbf'], 'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001] }, ] if __name__ == '__main__': rd = OldHamshahriReader(root=config.CORPORA_ROOT) docs, labels = rd.sklearn_docs(config.TOT_DOCS) #vectorizer = CountVectorizer(docs) vectorizer = TfidfVectorizer(lowercase=False, max_df=0.8) fs = vectorizer.fit_transform(docs) #vectorizer.build_preprocessor() selector = SelectPercentile(chi2, percentile=10) selector.fit(fs, labels) fs = selector.transform(fs) fs_train, fs_test, labels_train, labels_test = train_test_split( fs, labels, test_size=0.4, random_state=0) clf = None pred = None grid_search = False