from src.preprocessing.transformator import get_df from src.evaluation.compare import compare_classifiers from src.classifier.sklearn import pipelines from src.data_retrieval.helpers import database import pandas as pd def warn(*args, **kwargs): pass warnings.warn = warn db = database.MongoDB() df = get_df(list(db.get_articles())) # models feature_sets = [ 'bg_bert', 'bg_xlm', 'bg_styl', 'bg_lsa', 'en_use', 'en_nela', 'en_bert', 'en_elmo' ] features = [ ('top_1', ['bg_lsa_title', 'bg_lsa_text']), ('top_2', ['bg_lsa_title', 'bg_lsa_text', 'en_elmo_title', 'en_elmo_text']), ('top_3', [ 'bg_lsa_title', 'bg_lsa_text', 'en_elmo_title', 'en_elmo_text', 'en_use_title', 'en_use_text' ]), ('top_4', [
import numpy as np import pandas as pd from src.classifier.sklearn import pipelines from src.evaluation.compare import compare_classifiers from src.preprocessing.transformator import get_df from sklearn.model_selection import cross_val_predict, GridSearchCV from sklearn.linear_model import LogisticRegression #db = database.MongoDB() articles = list(db.get_articles()) df = get_df(articles) clf = LogisticRegression() feature_sets = [ #'bg_bert', 'bg_xlm', # 'bg_styl', # 'bg_lsa', # 'en_use', # 'en_nela', # 'en_bert', # 'en_elmo' ] all_feats = [] for feature_set in feature_sets: all_feats.append(feature_set + '_title')