''' Created on Mar 23, 2015 @author: aelsalla ''' from DatasetBuilder.DatasetBuilder import DatasetBuilder from FeaturesExtractor.FeaturesExtractor import FeaturesExtractor from Classifier.Classifier import Classifier import matplotlib.pyplot as plt import numpy as np # Initialize the DatasetBuilder ############################### dataSetBuilder = DatasetBuilder() testSetShare = 0.1 dataSetBuilder.csvPricesFileName = '.\\crawler\\prices\\prices_16_4_2015_15_30_55.csv' dataSetBuilder.csvNewsFileName = '.\\news_all.csv' trainSet, testSet = dataSetBuilder.BuildDataSet(testSetShare) dataSet = [] dataSet.extend(trainSet) dataSet.extend(testSet) ''' fullPrices = [] for price in dataSetBuilder.get_prices(): fullPrices.append(float(price['value'])) ''' fullPrices = [] labels = [] sizes = []
dirName = '.\\crawler\\news' for file in os.listdir(dirName): if file.endswith(".csv"): full_file_name = dirName + '\\' + file d.csvNewsFileName = full_file_name news_headlines.extend(d.get_news_headlines()) def CollectPrices(): dirName = '.\\crawler\\prices' for file in os.listdir('.\\crawler\\prices'): if file.endswith(".csv"): full_file_name = dirName + '\\' + file print(full_file_name) d.csvPricesFileName = full_file_name prices.extend(d.get_prices()) d = DatasetBuilder() CollectNews() CollectPrices() d.csvNewsFileName = 'news_all.csv' d.DumpNewsCSV(news_headlines) d.csvPricesFileName = 'prices_all.csv' d.DumpPricesCSV(prices)
for headline in news_headlines: print(headline['text'] + '\n' + headline['time_stamp']) headline_exist = NewsHeadline.objects.filter(text=headline['text']) if(len(headline_exist) == 0): headline_entry = NewsHeadline() headline_entry.text = headline['text'] headline_entry.time_stamp = headline['time_stamp'] headline_entry.save() d.csvNewsFileName = '.\\crawler\\news\\news_' + str(datetime.datetime.now().day) +'_' + str(datetime.datetime.now().month) +'_' + str(datetime.datetime.now().year) +'_' + str(datetime.datetime.now().hour) +'_' + str(datetime.datetime.now().minute) + '_' + str(datetime.datetime.now().second) +'.csv' d.DumpNewsCSV(news_headlines) prices = d.ParsePricesURL(priceStartDate) for price in prices: print(price['value'] + '\n' + price['time_stamp']) price_exist = Price.objects.filter(time_stamp=price['time_stamp']) if(len(price_exist) == 0): price_entry = Price() price_entry.value = price['value'] price_entry.time_stamp = price['time_stamp'] price_entry.save() d.csvPricesFileName = '.\\crawler\\prices\\prices_'+ str(datetime.datetime.now().day) +'_' + str(datetime.datetime.now().month) +'_' + str(datetime.datetime.now().year) +'_' + str(datetime.datetime.now().hour) +'_' + str(datetime.datetime.now().minute) + '_' + str(datetime.datetime.now().second) +'.csv' d.DumpPricesCSV(prices) # Crawl every hour time.sleep(3600)