def subsample_data(filename_data, filename_symbology, dir_pickle, start_date, end_date, query_attribute, query_criteria, include_avg): query_criteria_filename = '-'.join(query_criteria[:3]) pickle_name = dir_pickle+'pickle_sentiment_'+start_date+'_'+end_date+'_'+query_attribute+'_'+query_criteria_filename+'_'+str(include_avg)+'.p' try: data = pd.read_pickle(pickle_name) print("Loaded from pre-created pickle") except: print("Subsampling data from csv") # try to read first from pickle # read csv data = pd.read_csv(filename_data) # merge with symbology csv for additional info data_symbology = pd.read_csv(filename_symbology) # convert headers to uppercase for ease of use data_symbology.columns = [x.upper() for x in data_symbology.columns] data = pd.merge(data, data_symbology, left_on='SYMBOL', right_on='SYMBOL', how = "left") # perform filter query based on parameters data = data[data[query_attribute].isin(query_criteria)] # convert timestamps to datetime objects data['DATE'] = data['TIMESTAMP_UTC'].apply(lambda x: datetime.strptime(x,'%Y-%m-%dT%H:%M:%SZ')) data['DATE'] = data['DATE'].apply(lambda x: x.strftime('%x')) data['DATE'] = data['DATE'].apply(lambda x: pd.to_datetime(x)) # query between start and end date data = data[(data['DATE'] >= start_date) & (data['DATE'] <= end_date)] # remove avg if include_avg == False: avg_cols = [col for col in data.columns if 'AVG' in col] data.drop(avg_cols,inplace=True,axis=1) # save as pickle data.to_pickle(pickle_name) # return dataframe return data
import pandas as pd from pandas_datareader import data # Fetch daily data for 4 years SYMBOL = 'GOOG' start_date = '2014-01-01' end_date = '2018-01-01' SRC_DATA_FILENAME = SYMBOL + '_data.pkl' try: data = pd.read_pickle(SRC_DATA_FILENAME) except FileNotFoundError: data = data.DataReader(SYMBOL, 'yahoo', start_date, end_date) data.to_pickle(SRC_DATA_FILENAME) # Variables/constants for EMA Calculation: NUM_PERIODS_FAST = 10 # Static time period parameter for the fast EMA K_FAST = 2 / (NUM_PERIODS_FAST + 1 ) # Static smoothing factor parameter for fast EMA ema_fast = 0 ema_fast_values = [] # we will hold fast EMA values for visualization purposes NUM_PERIODS_SLOW = 40 # Static time period parameter for slow EMA K_SLOW = 2 / (NUM_PERIODS_SLOW + 1 ) # Static smoothing factor parameter for slow EMA ema_slow = 0 ema_slow_values = [] # we will hold slow EMA values for visualization purposes apo_values = [] # track computed absolute price oscillator value signals # Variables for Trading Strategy trade, position & pnl management: