from datetime import datetime import os from classes.DocumentsExporter import DocumentsExporter file_paths = { 'stopwords': '../input_sources/google_en_stopwords.txt', #'output_dir': 'C:/text_mining/data/test', 'output_dir': os.path.abspath('../../outputs/class_text/xtest'), } tp = DocumentsExporter(file_paths) #from_date = datetime(2015, 8, 5) from_date = datetime(2009, 1, 1) delay = 2 company_id = 1 #### Individual files #tp.process_documents_for_company('fb_post', company_id, from_date, delay, 'close', (3, 3), False) #tp.process_documents_for_company('fb_comment', company_id, from_date, delay, 'close') #tp.process_documents_for_company('article', company_id, from_date, delay, 'adjclose', (-2, 2), False) #tp.process_documents_for_company('tweet', company_id, from_date, delay, 'close') #twc_ids = [44, 202, 233, 300] nonsearch_cids = [48, 217, 458, 479] for c_id in nonsearch_cids: print ('====COMPANY %s====') % c_id tp.process_daily_documents_for_company('tweet', c_id, datetime(2015, 8, 5).date(), datetime(2016, 4, 2).date(), delay, 'adjclose', (-3, 3), False, 25000, False, nonsearch_cids)
from datetime import datetime import os import itertools from classes.DocumentsExporter import DocumentsExporter ##### # PARAMETERS definition #### # Prepare exporter object file_paths = { 'stopwords': '../input_sources/google_en_stopwords.txt', 'output_dir': os.path.abspath('../../outputs/class_text/sma20_ewma5/twitter'), } d_exporter = DocumentsExporter(file_paths) # Set lowest published date. # Yahoo: 2014-06-18 21:24:00, twitter: 2014-11-20 09:32:02, fb post: 2009-04-15 17:10:55, fb commnent: 2009-04-16 13:51:28 #from_date = datetime(2000, 1, 1) # Parameters lists delays = [1, 2, 3] price_types = ['adjclose', 'ewma', 'sma'] const_boundaries = [(-1, 1), (-2, 2), (-3, 3), (-4, 4), (-5, 5)] # Create all combinations of parameters. params_combinations = list(itertools.product(price_types, delays, const_boundaries)) ####### # EXECUTION #######