Esempio n. 1
0
import pandas as pd
from controller.config import Config
from controller.data_label_utils import get_all_sentiment_labels
from controller.database_service import DatabaseService
from controller.ml_utils import MLUtils

config = Config()
dbs = DatabaseService(config)

# get databases
collection_names = ['sickhillary', 'baghdadi_dead', 'death_hoax']

# declare test parameters
description = "Experiment 2"
n_iterations_per_classifier = 100
test_ratio = 0.2
target_label = "tweet_sentiment_label"
classifier_dict = MLUtils.get_classifiers_standard_suite()
features_tested_dict = {'feature_nlp_afinn_swn': MLUtils.gen_feature_afinn_swn,
                        'feature_nlp_pos': MLUtils.gen_feature_pos,
                        'feature_term_tfidf': MLUtils.gen_feature_term_tfidf}

ml_collections_result = {}

# iterate through collections
for collection_name in collection_names:
    print("Collection: " + collection_name)
    # get labelled tweets (unique)
    mongo_search_query = {"tweet_sentiment_label": {"$exists": True}}
    mongo_filter_query = {"text": 1, "tweet_type": 1, "tweet_sentiment_label": 1}
    mongo_filter_query.update({label: 1 for label in get_all_sentiment_labels()})
Esempio n. 2
0
from controller.config import Config
from controller.tweet_links_service import TweetLinksService
from controller.database_service import DatabaseService
from controller.database_utils import get_tweet_collections_only

config = Config()
dbs = DatabaseService(config)

db = dbs.get_db()
collection_names = get_tweet_collections_only(db.collection_names())

dls = TweetLinksService(dbs, config)
dls.gen_all_collection_links()

pass
Esempio n. 3
0
import pandas as pd
from controller.data_label_utils import get_all_sentiment_labels
from pyexcelerate import Workbook
from controller.config import Config
from controller.database_service import DatabaseService

config = Config()
dbs = DatabaseService(config)

excel_save_path = config.get_excel_exports_dir() + "\data.xlsx"
wb = Workbook()

collection_rumors = ['sickhillary', 'baghdadi_dead', 'death_hoax']
collection_news = ['mosul_battle', 'us_economic_policy', 'trump_cabinet']
collection_all = collection_rumors + collection_news

for collection_name in collection_all:
    # get unique tweet list
    unique_tweet_list = dbs.get_unique_tweet_ids_for_collection(
        collection_name, sorted_by_childs_length=True)

    mongo_search_query = {}
    mongo_filter_query = {
        "_id": 1,
        "text": 1,
        "tweet_type": 1,
        "tweet_sentiment_label": 1
    }
    mongo_filter_query.update(
        {label: 1
         for label in get_all_sentiment_labels()})
Esempio n. 4
0
from controller.config import Config
from controller.data_label_service import DataLabelService
from controller.database_service import DatabaseService

config = Config()
dbs = DatabaseService(config)
dls = DataLabelService(dbs, config)

dls.set_all_sentiments()

pass
Esempio n. 5
0
import pandas as pd
from controller.config import Config
from controller.database_service import DatabaseService
from controller.database_utils import get_tweet_collections_only
from controller.nltk_manager import NLTKManager
from controller.preprocessing_utils import PreprocessingUtils as PPU

config = Config()
dbs = DatabaseService(config)

db = dbs.get_db()
collection_names = get_tweet_collections_only(dbs.get_collection_names())
for collection_name in collection_names:
    # get tweets
    filter_query = {
        "_id": 1,
        "text": 1,
        "tweet_sentiment_label": 1,
        "tweet_score_afinn": 1,
        "tweet_score_swn_pos": 1,
        "tweet_score_swn_neg": 1,
        "tweet_score_swn_obj": 1
    }
    tweets = dbs.get_unique_tweets_for_collection(
        collection_name,
        sorted_by_childs_length=False,
        filter_query=filter_query)

    pdAll = pd.DataFrame(tweets)

    # text preprocessing