import sqlite3 from util import get_time_mask table_names, dates = get_time_mask() conn = sqlite3.connect('data/predicted_data.db') c = conn.cursor() for i in range(len(table_names)): c.execute("INSERT INTO full_sentiments (text,date,username,location,sentiment) SELECT text,date,username,location,sentiment FROM " + table_names[i] + " WHERE not sentiment='n'") print("Adding data from table " + table_names[i]) conn.commit() conn.close()
CLEAN_TEXT = True USE_REFINED_TRAINING = True TF_IDF = True NUM_FEATURES = 740 CONFIDENCE_THRESHOLD = 0.7 USE_STOP_WORDS = False # Only works if CLEAN_TEXT = True REGULARIZATION = 'l2' RUN_VALIDATION_SET = True RUN_TEST_SET = True PREDICT = False FILTER_PREDICTIONS = False CONTINUOUS = True LOC_PRED_DB = 'data/predicted_data.db' table_names, _ = get_time_mask() PRED_BATCH_SIZE = 10000 blacklist = [] if USE_STOP_WORDS: stop_words = [ 'climate', 'change', 'climatechange', 'global', 'warming', 'globalwarming' ] stop_words = set(stop_words) else: stop_words = set() # Logger class Logger(object):
def flush(self): pass #sys.stdout = Logger() # Flags LOC_TRAIN_DB = 'data/refined_training_data.db' LOC_OLD_TRAIN_DB = 'data/labelled_data.db' LOC_PRED_DB = 'data/predicted_data.db' USE_OLD_DATA = True PRINT_FEATURES = False PRINT_FULL_VALIDATION_RESULTS = False PRINT_CONFUSION_MATRIX = True PREDICT = False PREDICT_TABLE_NAMES, _ = get_time_mask() PREDICT_BATCH_SIZE = 1000 stop_words = [ 'global', 'warming', 'globalwarming', 'climate', 'climate', 'climatechange', 'http', 'https' ] PLOT_NUM_FEATURES = False # Setup database connection and load data conn = sqlite3.connect(LOC_TRAIN_DB) c = conn.cursor() c.execute("SELECT * FROM training") tr_d = c.fetchall() c.execute("SELECT * FROM test") te_d = c.fetchall() c.execute("SELECT * FROM validation")