Ejemplo n.º 1
0
import sqlite3
from util import get_time_mask

table_names, dates = get_time_mask()

conn = sqlite3.connect('data/predicted_data.db')
c = conn.cursor()

for i in range(len(table_names)):
	c.execute("INSERT INTO full_sentiments (text,date,username,location,sentiment) SELECT text,date,username,location,sentiment FROM " + table_names[i] + " WHERE not sentiment='n'")
	print("Adding data from table " + table_names[i])
	conn.commit()
conn.close()
Ejemplo n.º 2
0
CLEAN_TEXT = True
USE_REFINED_TRAINING = True
TF_IDF = True
NUM_FEATURES = 740
CONFIDENCE_THRESHOLD = 0.7
USE_STOP_WORDS = False  # Only works if CLEAN_TEXT = True
REGULARIZATION = 'l2'

RUN_VALIDATION_SET = True
RUN_TEST_SET = True

PREDICT = False
FILTER_PREDICTIONS = False
CONTINUOUS = True
LOC_PRED_DB = 'data/predicted_data.db'
table_names, _ = get_time_mask()
PRED_BATCH_SIZE = 10000
blacklist = []

if USE_STOP_WORDS:
    stop_words = [
        'climate', 'change', 'climatechange', 'global', 'warming',
        'globalwarming'
    ]
    stop_words = set(stop_words)
else:
    stop_words = set()


# Logger
class Logger(object):
Ejemplo n.º 3
0
    def flush(self):
        pass


#sys.stdout = Logger()

# Flags
LOC_TRAIN_DB = 'data/refined_training_data.db'
LOC_OLD_TRAIN_DB = 'data/labelled_data.db'
LOC_PRED_DB = 'data/predicted_data.db'
USE_OLD_DATA = True
PRINT_FEATURES = False
PRINT_FULL_VALIDATION_RESULTS = False
PRINT_CONFUSION_MATRIX = True
PREDICT = False
PREDICT_TABLE_NAMES, _ = get_time_mask()
PREDICT_BATCH_SIZE = 1000
stop_words = [
    'global', 'warming', 'globalwarming', 'climate', 'climate',
    'climatechange', 'http', 'https'
]
PLOT_NUM_FEATURES = False

# Setup database connection and load data
conn = sqlite3.connect(LOC_TRAIN_DB)
c = conn.cursor()
c.execute("SELECT * FROM training")
tr_d = c.fetchall()
c.execute("SELECT * FROM test")
te_d = c.fetchall()
c.execute("SELECT * FROM validation")