def get_feature_importance(language: str, embeddings_path: str, platform_folder: str, n_estimators: int, platform: str, dataset: str): print("#################") print(dataset) print() t = Model(language=language, embeddings_path=embeddings_path, platform_folder=platform_folder) feature_importances = t.feature_importance(n_estimators=n_estimators, platform=platform, dataset=dataset) print(feature_importances) print()
from sklearn.svm import SVC from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import GaussianNB, MultinomialNB from sklearn.ensemble import RandomForestClassifier from sklearn.neighbors import KNeighborsClassifier from fakenews_detector.model import Model dataset = "tweets_br" platform = 'Twitter' language = "pt" tree_estimators = np.arange(1, 1001, 50) t = Model(language='pt', embeddings_path='embeddings/pt/model.txt', platform_folder='datasets/Twitter/tweets_br/') ########################################################################################################## print("TESTING WITH LTO") print() print() # FEATURES print("FEATURES + GAUSSIANNB") t.custom_gridsearch(dataset=dataset, platform=platform, parameters={}, language=language, clf="gnb", feature_set=Model.FEATURES_CODE)
from sklearn.svm import SVC from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import GaussianNB, MultinomialNB from sklearn.ensemble import RandomForestClassifier from sklearn.neighbors import KNeighborsClassifier from fakenews_detector.model import Model language = 'bg' platform = 'Websites' dataset = 'btv-lifestyle' tree_estimators = np.arange(1, 1001, 50) t = Model(language='bg', embeddings_path='embeddings/bg/model.txt', platform_folder='datasets/Websites/btv-lifestyle/') # FEATURES print("FEATURES + GAUSSIANNB") t.classify(dataset=dataset, platform=platform, language=language, clf=GaussianNB(), parameters={}, feature_set=Model.FEATURES_CODE, kfold=5) print("------------------------------------------------------") print("FEATURES + KNN") t.classify(dataset=dataset,
from sklearn.svm import SVC from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import GaussianNB, MultinomialNB from sklearn.ensemble import RandomForestClassifier from sklearn.neighbors import KNeighborsClassifier from fakenews_detector.model import Model dataset = "tweets_br" platform = 'Twitter' language = "pt" tree_estimators = np.arange(1, 1001, 50) t = Model(language='pt', embeddings_path='embeddings/pt/model.txt', platform_folder='datasets/Twitter/tweets_br/') ########################################################################################################## print("TESTING WITH LTO") print() print() # FEATURES print("BOW + SVC") fscore_topic_svm, _, _, _, _ = t.classify_lto('Twitter', 'tweets_br', 'pt', SVC(kernel='poly'), Model.BOW_CODE) print("------------------------------------------------------")