import pandas as pd from controller.config import Config from controller.data_label_utils import get_all_sentiment_labels from controller.database_service import DatabaseService from controller.ml_utils import MLUtils config = Config() dbs = DatabaseService(config) # get databases collection_names = ['sickhillary', 'baghdadi_dead', 'death_hoax'] # declare test parameters description = "Experiment 2" n_iterations_per_classifier = 100 test_ratio = 0.2 target_label = "tweet_sentiment_label" classifier_dict = MLUtils.get_classifiers_standard_suite() features_tested_dict = {'feature_nlp_afinn_swn': MLUtils.gen_feature_afinn_swn, 'feature_nlp_pos': MLUtils.gen_feature_pos, 'feature_term_tfidf': MLUtils.gen_feature_term_tfidf} ml_collections_result = {} # iterate through collections for collection_name in collection_names: print("Collection: " + collection_name) # get labelled tweets (unique) mongo_search_query = {"tweet_sentiment_label": {"$exists": True}} mongo_filter_query = {"text": 1, "tweet_type": 1, "tweet_sentiment_label": 1} mongo_filter_query.update({label: 1 for label in get_all_sentiment_labels()})
from controller.config import Config from controller.tweet_links_service import TweetLinksService from controller.database_service import DatabaseService from controller.database_utils import get_tweet_collections_only config = Config() dbs = DatabaseService(config) db = dbs.get_db() collection_names = get_tweet_collections_only(db.collection_names()) dls = TweetLinksService(dbs, config) dls.gen_all_collection_links() pass
import pandas as pd from controller.data_label_utils import get_all_sentiment_labels from pyexcelerate import Workbook from controller.config import Config from controller.database_service import DatabaseService config = Config() dbs = DatabaseService(config) excel_save_path = config.get_excel_exports_dir() + "\data.xlsx" wb = Workbook() collection_rumors = ['sickhillary', 'baghdadi_dead', 'death_hoax'] collection_news = ['mosul_battle', 'us_economic_policy', 'trump_cabinet'] collection_all = collection_rumors + collection_news for collection_name in collection_all: # get unique tweet list unique_tweet_list = dbs.get_unique_tweet_ids_for_collection( collection_name, sorted_by_childs_length=True) mongo_search_query = {} mongo_filter_query = { "_id": 1, "text": 1, "tweet_type": 1, "tweet_sentiment_label": 1 } mongo_filter_query.update( {label: 1 for label in get_all_sentiment_labels()})
from controller.config import Config from controller.data_label_service import DataLabelService from controller.database_service import DatabaseService config = Config() dbs = DatabaseService(config) dls = DataLabelService(dbs, config) dls.set_all_sentiments() pass
import pandas as pd from controller.config import Config from controller.database_service import DatabaseService from controller.database_utils import get_tweet_collections_only from controller.nltk_manager import NLTKManager from controller.preprocessing_utils import PreprocessingUtils as PPU config = Config() dbs = DatabaseService(config) db = dbs.get_db() collection_names = get_tweet_collections_only(dbs.get_collection_names()) for collection_name in collection_names: # get tweets filter_query = { "_id": 1, "text": 1, "tweet_sentiment_label": 1, "tweet_score_afinn": 1, "tweet_score_swn_pos": 1, "tweet_score_swn_neg": 1, "tweet_score_swn_obj": 1 } tweets = dbs.get_unique_tweets_for_collection( collection_name, sorted_by_childs_length=False, filter_query=filter_query) pdAll = pd.DataFrame(tweets) # text preprocessing