Esempio n. 1
0
def get_feature_importance(language: str, embeddings_path: str,
                           platform_folder: str, n_estimators: int,
                           platform: str, dataset: str):
    print("#################")
    print(dataset)
    print()
    t = Model(language=language,
              embeddings_path=embeddings_path,
              platform_folder=platform_folder)
    feature_importances = t.feature_importance(n_estimators=n_estimators,
                                               platform=platform,
                                               dataset=dataset)
    print(feature_importances)
    print()
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

from fakenews_detector.model import Model

dataset = "tweets_br"
platform = 'Twitter'
language = "pt"
tree_estimators = np.arange(1, 1001, 50)

t = Model(language='pt',
          embeddings_path='embeddings/pt/model.txt',
          platform_folder='datasets/Twitter/tweets_br/')

##########################################################################################################
print("TESTING WITH LTO")
print()
print()

# FEATURES
print("FEATURES + GAUSSIANNB")
t.custom_gridsearch(dataset=dataset,
                    platform=platform,
                    parameters={},
                    language=language,
                    clf="gnb",
                    feature_set=Model.FEATURES_CODE)
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

from fakenews_detector.model import Model

language = 'bg'
platform = 'Websites'
dataset = 'btv-lifestyle'
tree_estimators = np.arange(1, 1001, 50)

t = Model(language='bg',
          embeddings_path='embeddings/bg/model.txt',
          platform_folder='datasets/Websites/btv-lifestyle/')

# FEATURES
print("FEATURES + GAUSSIANNB")
t.classify(dataset=dataset,
           platform=platform,
           language=language,
           clf=GaussianNB(),
           parameters={},
           feature_set=Model.FEATURES_CODE,
           kfold=5)
print("------------------------------------------------------")

print("FEATURES + KNN")
t.classify(dataset=dataset,
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

from fakenews_detector.model import Model

dataset = "tweets_br"
platform = 'Twitter'
language = "pt"
tree_estimators = np.arange(1, 1001, 50)

t = Model(language='pt',
          embeddings_path='embeddings/pt/model.txt',
          platform_folder='datasets/Twitter/tweets_br/')

##########################################################################################################
print("TESTING WITH LTO")
print()
print()

# FEATURES

print("BOW + SVC")
fscore_topic_svm, _, _, _, _ = t.classify_lto('Twitter', 'tweets_br', 'pt',
                                              SVC(kernel='poly'),
                                              Model.BOW_CODE)
print("------------------------------------------------------")