def merge_lstm_data(starttime='2016-06-24', split_date='2018-06-20', period=900, features=[ '_volatility', '_date', '_close', '_len_day_data', '_neg_res', '_neut_tweet', '_pos_tweet' ]): filestr = 'start_' + str(starttime) + '_split_' + str( split_date) + '_period_' + str(period) try: training_set = pd.read_csv('CurDat/temp_results_train_' + filestr + '.csv') test_set = pd.read_csv('CurDat/temp_results_train_' + filestr + '.csv') model_data = pd.read_csv('CurDat/temp_results_model_' + filestr + '.csv') print('Loaded data from CurDat/temp_results_train_' + filestr + '.csv!') print('Loaded data from CurDat/temp_results_test_' + filestr + '.csv!') print('Loaded data from CurDat/temp_results_model_' + filestr + '.csv!') except: ####### important parameters################## print("Merge coin & twit data sets..") selected_coins = ['BTC', 'ETH'] #['BTC', 'LTC', 'ETH', 'XMR'] # period = 900#86400 candlestick period in seconds; valid values are 300, 900, 1800, 7200, 14400, and 86400), # split_date = '2018-06-20' # window_len = 25 ####################################### df_ini = data_reader.get_poloniex_data(startdate=starttime) coin_list = df_ini.fetch_coin_data(selected_coins, period) import raspi_tweet_fetcher.Load_Tweets_Class as LTC ### init get_tweet_data = LTC.get_tweets() ### fetch data based on query word #get_tweet_data.fetch_tweets(query='BITCOIN', count=100, pages=1) #get_tweet_data.fetch_stocktwits(query='BITCOIN') ### delete duplicates data from CSV tweet_data = get_tweet_data.read_and_clean_data_from_csv( query='BITCOIN') #data = get_tweet_data.data fin_data = coin_list['BTC'] ### analyze sentiment tweet_results = get_tweet_data.analyze_Tweets(tweet_data) fin_data['Date'] = pd.to_datetime(fin_data.date_format) tweet_results['Date'] = pd.to_datetime(tweet_results['days']) merged = pd.merge(fin_data, tweet_results, how='outer', on='Date') merged.dropna(how='any', inplace=True) new_coin_list = {} new_coin_list['BTC'] = merged training_set, test_set, model_data = create_coin_dataset( new_coin_list, starttime=starttime, split_date=split_date, features=[ '_volatility', '_date', '_close', '_len_day_data', '_neg_res', '_neut_tweet', '_pos_tweet' ]) # '_date','_close_off_high']) # # features = ['_close','_volume','_volatility','_date', '_weightedAverage' , # '_len_day_data' , '_neg_res', '_neut_tweet', '_pos_tweet']) panda_train = pd.DataFrame(training_set) panda_train.to_csv('CurDat/temp_results_train_' + filestr + '.csv') panda_test = pd.DataFrame(test_set) panda_test.to_csv('CurDat/temp_results_test_' + filestr + '.csv') panda_model = pd.DataFrame(model_data) panda_model.to_csv('CurDat/temp_results_model_' + filestr + '.csv') return training_set, test_set, model_data
@author: Chris """ import data_reader as data_reader import raspi_tweet_fetcher.Load_Tweets_Class as LTC from textblob import TextBlob import re import os import pandas as pd selected_coins = ['BTC', 'ETH'] #['BTC', 'LTC', 'ETH', 'XMR'] period = 7200#86400 split_date = '2018-06-15' df_ini = data_reader.get_poloniex_data() coin_list = df_ini.fetch_coin_data(selected_coins, period) ### init get_tweet_data = LTC.get_tweets() ### fetch data based on query word #get_tweet_data.fetch_tweets(query='BITCOIN', count=100, pages=1) #get_tweet_data.fetch_stocktwits(query='BTC.X') ### delete duplicates data from CSV