def share_table2fund(share_table): """ @summary converts data frame of shares into fund values @param share_table: data frame containing shares on days transactions occured @return fund : time series containing fund value over time @return leverage : time series containing fund value over time """ # Get the data from the data store dataobj = da.DataAccess('mysql') startday = share_table.index[0] endday = share_table.index[-1] symbols = list(share_table.columns) symbols.remove('_CASH') # print symbols # Get desired timestamps timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(startday - dt.timedelta(days=5), endday + dt.timedelta(days=1), timeofday) historic = dataobj.get_data(timestamps, symbols, ["close"])[0] historic.fillna(method='ffill', inplace=True) historic["_CASH"] = 1 closest = historic[historic.index <= share_table.index[0]].ix[:] ts_leverage = pandas.Series(0, index=[closest.index[-1]]) # start shares/fund out as 100% cash first_val = closest.ix[-1] * share_table.ix[0] fund_ts = pandas.Series([first_val.sum(axis=1)], index=[closest.index[-1]]) prev_row = share_table.ix[0] for row_index, row in share_table.iterrows(): # print row_index trade_price = historic.ix[row_index:].ix[0:1] trade_date = trade_price.index[0] # print trade_date # get stock prices on all the days up until this trade to_calculate = historic[(historic.index <= trade_date) & (historic.index > fund_ts.index[-1])] # multiply prices by our current shares values_by_stock = to_calculate * prev_row # for date, sym in values_by_stock.iteritems(): # print date,sym # print values_by_stock prev_row = row #update leverage ts_leverage = _calculate_leverage(values_by_stock, ts_leverage) # calculate total value and append to our fund history fund_ts = fund_ts.append([values_by_stock.sum(axis=1)]) return [fund_ts, ts_leverage]
def main(): ''' Main Function''' # Start and End date of the charts dt_start = dt.datetime(2004, 1, 1) dt_end = dt.datetime(2009, 12, 31) # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('EODHistoricalData') # List of symbols - First 20 ls_symbols = c_dataobj.get_symbols_from_list('sp5002012') ls_symbols = ls_symbols[:20] ls_symbols.append('_CASH') # Creating the first allocation row na_vals = np.random.randint(0, 1000, len(ls_symbols)) # Normalize the row - Typecasting as everything is int. na_vals = na_vals / float(sum(na_vals)) # Reshape to a 2D matrix to append into dataframe. na_vals = na_vals.reshape(1, -1) # Creating Allocation DataFrames df_alloc = pd.DataFrame(na_vals, index=[ldt_timestamps[0]], columns=ls_symbols) dt_last_date = ldt_timestamps[0] # Looping through all dates and creating monthly allocations for dt_date in ldt_timestamps[1:]: if dt_last_date.month != dt_date.month: # Create allocation na_vals = np.random.randint(0, 1000, len(ls_symbols)) na_vals = na_vals / float(sum(na_vals)) na_vals = na_vals.reshape(1, -1) # Append to the dataframe df_new_row = pd.DataFrame(na_vals, index=[dt_date], columns=ls_symbols) df_alloc = df_alloc.append(df_new_row) dt_last_date = dt_date # Create the outpul pickle file for the dataframe. output = open('allocation.pkl', 'wb') pickle.dump(df_alloc, output)
def daily(lfFunds): """ @summary Computes daily returns centered around 0 @param funds: A time series containing daily fund values @return an array of daily returns """ if type(lfFunds) == type(pd.Series()): ldt_timestamps = du.getNYSEdays(lfFunds.index[0], lfFunds.index[-1], dt.timedelta(hours=16)) lfFunds = lfFunds.reindex(index=ldt_timestamps, method='ffill') nds = np.asarray(deepcopy(lfFunds)) s = np.shape(nds) if len(s) == 1: nds = np.expand_dims(nds, 1) returnize0(nds) return(nds)
def calculate_efficiency(dt_start_date, dt_end_date, s_stock): """ @summary calculates the exit-entry/high-low trade efficiency of a stock from historical data @param start_date: entry point for the trade @param end_date: exit point for the trade @param stock: stock to compute efficiency for @return: float representing efficiency """ # Get the data from the data store dataobj = da.DataAccess('mysql') # Get desired timestamps timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(dt_start_date, dt_end_date + dt.timedelta(days=1), timeofday) historic = dataobj.get_data(timestamps, [s_stock], ["close"])[0] # print "######" # print historic hi = numpy.max(historic.values) low = numpy.min(historic.values) entry = historic.values[0] exit_price = historic.values[-1] return (((exit_price - entry) / (hi - low))[0])
def main(): ''' Main Function''' # List of symbols ls_symbols = ["AAPL", "GLD", "IJR", "SPY", "XOM"] # Start and End date of the charts dt_start = dt.datetime(2006, 1, 1) dt_end = dt.datetime(2010, 12, 31) # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Creating an object of the dataaccess class c_dataobj = da.DataAccess('EODHistoricalData') # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # Filling the data for NAN for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) # Getting the numpy ndarray of close prices. na_price = d_data['close'].values # Plotting the prices with x-axis=timestamps plt.clf() plt.plot(ldt_timestamps, na_price) plt.legend(ls_symbols) plt.ylabel('Adjusted Close') plt.xlabel('Date') plt.savefig('adjustedclose.pdf', format='pdf') # Normalizing the prices to start at 1 and see relative returns na_normalized_price = na_price / na_price[0, :] # Plotting the prices with x-axis=timestamps plt.clf() plt.plot(ldt_timestamps, na_normalized_price) plt.legend(ls_symbols) plt.ylabel('Normalized Close') plt.xlabel('Date') plt.savefig('normalized.pdf', format='pdf') # Copy the normalized prices to a new ndarry to find returns. na_rets = na_normalized_price.copy() # Calculate the daily returns of the prices. (Inplace calculation) # returnize0 works on ndarray and not dataframes. tsu.returnize0(na_rets) # Plotting the plot of daily returns plt.clf() plt.plot(ldt_timestamps[0:50], na_rets[0:50, 3]) # $SPX 50 days plt.plot(ldt_timestamps[0:50], na_rets[0:50, 4]) # XOM 50 days plt.axhline(y=0, color='r') plt.legend(['$SPX', 'XOM']) plt.ylabel('Daily Returns') plt.xlabel('Date') plt.savefig('rets.pdf', format='pdf') # Plotting the scatter plot of daily returns between XOM VS $SPX plt.clf() plt.scatter(na_rets[:, 3], na_rets[:, 4], c='blue') plt.ylabel('XOM') plt.xlabel('$SPX') plt.savefig('scatterSPXvXOM.pdf', format='pdf') # Plotting the scatter plot of daily returns between $SPX VS GLD plt.clf() plt.scatter(na_rets[:, 3], na_rets[:, 1], c='blue') # $SPX v GLD plt.ylabel('GLD') plt.xlabel('$SPX') plt.savefig('scatterSPXvGLD.pdf', format='pdf')
# Testing pftk.pftkstudy try: import pftk.pftkstudy.event_profiler print("pftk.pftkstudy is installed and can be imported") except ImportError: exit("Error : pftk.pftkstudy can not be imported.") print # Checking that the data installed is correct. # Start and End date of the charts dt_start = dt.datetime(2012, 2, 10) dt_end = dt.datetime(2012, 2, 24) dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) ls_symbols = ['MSFT', 'GOOG'] # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo', verbose=True) # Reading adjusted_close prices df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close") print(df_close) print print("\nCorrect Output using the Default Data should be : ") print("Assignments use this data for grading") print(" MSFT GOOG") print("2012-02-10 16:00:00 29.90 605.91") print("2012-02-13 16:00:00 29.98 612.20") print("2012-02-14 16:00:00 29.86 609.76") print("2012-02-15 16:00:00 29.66 605.56")
def main(): '''Main Function''' # S&P 100 ls_symbols = ['AAPL', 'ABT', 'ACN', 'AEP', 'ALL', 'AMGN', 'AMZN', 'APC', 'AXP', 'BA', 'BAC', 'BAX', 'BHI', 'BK', 'BMY', 'CAT', 'C', 'CL', 'CMCSA', 'COF', 'COP', 'COST', 'CPB', 'CSCO', 'CVS', 'CVX', 'DD', 'DELL', 'DIS', 'DOW', 'DVN', 'EBAY', 'EMC', 'EXC', 'F', 'FCX', 'FDX', 'GD', 'GE', 'GILD', 'GOOG', 'GS', 'HAL', 'HD', 'HNZ', 'HON', 'HPQ', 'IBM', 'INTC', 'JNJ', 'JPM', 'KO', 'LLY', 'LMT', 'LOW', 'MA', 'MCD', 'MDT', 'MET', 'MMM', 'MO', 'MON', 'MRK', 'MS', 'MSFT', 'NKE', 'NOV', 'NSC', 'NWSA', 'NYX', 'ORCL', 'OXY', 'PEP', 'PFE', 'PG', 'PM', 'QCOM', 'RF', 'RTN', 'SBUX', 'SLB', 'SO', 'SPG', 'T', 'TGT', 'TWX', 'TXN', 'UNH', 'UPS', 'USB', 'UTX', 'VZ', 'WFC', 'WMB', 'WMT', 'XOM'] # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('EODHistoricalData') ls_all_syms = c_dataobj.get_all_symbols() # Bad symbols are symbols present in portfolio but not in all syms ls_bad_syms = list(set(ls_symbols) - set(ls_all_syms)) for s_sym in ls_bad_syms: i_index = ls_symbols.index(s_sym) ls_symbols.pop(i_index) # Start and End date of the charts dt_end = dt.datetime(2010, 1, 1) dt_start = dt_end - dt.timedelta(days=365) dt_test = dt_end + dt.timedelta(days=365) # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) ldt_timestamps_test = du.getNYSEdays(dt_end, dt_test, dt_timeofday) # Reading just the close prices df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close") df_close_test = c_dataobj.get_data(ldt_timestamps_test, ls_symbols, "close") # Filling the data for missing NAN values df_close = df_close.fillna(method='ffill') df_close = df_close.fillna(method='bfill') df_close_test = df_close_test.fillna(method='ffill') df_close_test = df_close_test.fillna(method='bfill') # Copying the data values to a numpy array to get returns na_data = df_close.values.copy() na_data_test = df_close_test.values.copy() # Getting the daily returns tsu.returnize0(na_data) tsu.returnize0(na_data_test) # Calculating the frontier. (lf_returns, lf_std, lna_portfolios, na_avgrets, na_std) = getFrontier(na_data) (lf_returns_test, lf_std_test, unused, unused, unused) = getFrontier(na_data_test) # Plotting the efficient frontier plt.clf() plt.plot(lf_std, lf_returns, 'b') plt.plot(lf_std_test, lf_returns_test, 'r') # Plot where the efficient frontier would be the following year lf_ret_port_test = [] lf_std_port_test = [] for na_portfolio in lna_portfolios: na_port_rets = np.dot(na_data_test, na_portfolio) lf_std_port_test.append(np.std(na_port_rets)) lf_ret_port_test.append(np.average(na_port_rets)) plt.plot(lf_std_port_test, lf_ret_port_test, 'k') # Plot indivisual stock risk/return as green + for i, f_ret in enumerate(na_avgrets): plt.plot(na_std[i], f_ret, 'g+') # # Plot some arrows showing transistion of efficient frontier # for i in range(0, 101, 10): # plt.arrow(lf_std[i], lf_returns[i], lf_std_port_test[i] - lf_std[i], # lf_ret_port_test[i] - lf_returns[i], color='k') # Labels and Axis plt.legend(['2009 Frontier', '2010 Frontier', 'Performance of \'09 Frontier in 2010'], loc='lower right') plt.title('Efficient Frontier For S&P 100 ') plt.ylabel('Expected Return') plt.xlabel('StDev') plt.savefig('tutorial8.pdf', format='pdf')
def main(): ''' Main Function''' # Reading the portfolio na_portfolio = np.loadtxt('tutorial3_portfolio.csv', dtype='U5,f4', delimiter=',', comments="#", skiprows=1) print(na_portfolio) # Sorting the portfolio by symbol name na_portfolio = sorted(na_portfolio, key=lambda x: x[0]) print(na_portfolio) # Create two list for symbol names and allocation ls_port_syms = [] lf_port_alloc = [] for port in na_portfolio: ls_port_syms.append(port[0]) lf_port_alloc.append(port[1]) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('EODHistoricalData') ls_all_syms = c_dataobj.get_all_symbols() print(ls_all_syms) # Bad symbols are symbols present in portfolio but not in all syms ls_bad_syms = list(set(ls_port_syms) - set(ls_all_syms)) if len(ls_bad_syms) != 0: print("Portfolio contains bad symbols : ", ls_bad_syms) for s_sym in ls_bad_syms: i_index = ls_port_syms.index(s_sym) ls_port_syms.pop(i_index) lf_port_alloc.pop(i_index) # Reading the historical data. dt_end = dt.datetime(2011, 1, 1) dt_start = dt_end - dt.timedelta(days=1095) # Three years # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Keys to be read from the data, it is good to read everything in one go. ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] # Reading the data, now d_data is a dictionary with the keys above. # Timestamps and symbols are the ones that were specified before. ldf_data = c_dataobj.get_data(ldt_timestamps, ls_port_syms, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) # Copying close price into separate dataframe to find rets df_rets = d_data['close'].copy() # Filling the data. df_rets = df_rets.fillna(method='ffill') df_rets = df_rets.fillna(method='bfill') df_rets = df_rets.fillna(1.0) # Numpy matrix of filled data values na_rets = df_rets.values # returnize0 works on ndarray and not dataframes. tsu.returnize0(na_rets) # Estimate portfolio returns na_portrets = np.sum(na_rets * lf_port_alloc, axis=1) na_port_total = np.cumprod(na_portrets + 1) na_component_total = np.cumprod(na_rets + 1, axis=0) # Plotting the results plt.clf() fig = plt.figure() fig.add_subplot(111) plt.plot(ldt_timestamps, na_component_total, alpha=0.4) plt.plot(ldt_timestamps, na_port_total) ls_names = ls_port_syms ls_names.append('Portfolio') plt.legend(ls_names) plt.ylabel('Cumulative Returns') plt.xlabel('Date') fig.autofmt_xdate(rotation=45) plt.savefig('tutorial3.pdf', format='pdf')
def main(): '''Main Function''' # List of symbols ls_symbols = ["AAPL", "GOOG"] # Start and End date of the charts dt_start = dt.datetime(2008, 1, 1) dt_end = dt.datetime(2010, 12, 31) # We need closing prices so the timestamp should be hours=16. dt_timeofday = dt.timedelta(hours=16) # Get a list of trading days between the start and the end. ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday) # Creating an object of the dataaccess class with Yahoo as the source. c_dataobj = da.DataAccess('Yahoo') # Reading just the close prices df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close") df_close = df_close.fillna(method='ffill') df_close = df_close.fillna(method='bfill') df_close = df_close.fillna(1.0) # Creating the allocation dataframe # We offset the time for the simulator to have atleast one # datavalue before the allocation. df_alloc = pd.DataFrame(np.array([[0.5, 0.5]]), index=[ldt_timestamps[0] + dt.timedelta(hours=5)], columns=ls_symbols) dt_last_date = ldt_timestamps[0] # Looping through all dates and creating monthly allocations for dt_date in ldt_timestamps[1:]: if dt_last_date.month != dt_date.month: # Create allocation na_vals = np.random.randint(0, 1000, len(ls_symbols)) na_vals = na_vals / float(sum(na_vals)) na_vals = na_vals.reshape(1, -1) # Append to the dataframe df_new_row = pd.DataFrame(na_vals, index=[dt_date], columns=ls_symbols) df_alloc = df_alloc.append(df_new_row) dt_last_date = dt_date # Adding cash to the allocation matrix df_alloc['_CASH'] = 0.0 # Running the simulator on the allocation frame (ts_funds, ts_leverage, f_commission, f_slippage, f_borrow_cost) = tradesim(df_alloc, df_close, f_start_cash=10000.0, i_leastcount=1, b_followleastcount=True, f_slippage=0.0005, f_minimumcommision=5.0, f_commision_share=0.0035, i_target_leverage=1, f_rate_borrow=3.5, log="transaction.csv") print("Simulated Fund Time Series : ") print(ts_funds) print("Transaction Costs : ") print("Commissions : ", f_commission) print("Slippage : ", f_slippage) print("Borrowing Cost : ", f_borrow_cost)
def getRandPort( lNum, dtStart=None, dtEnd=None, lsStocks=None,\ dmPrice=None, dmVolume=None, bFilter=True, fNonNan=0.95,\ fPriceVolume=100*1000, lSeed=None ): """ @summary Returns a random portfolio based on certain criteria. @param lNum: Number of stocks to be included @param dtStart: Start date for portfolio @param dtEnd: End date for portfolio @param lsStocks: Optional list of ticker symbols, if not provided all symbols will be used @param bFilter: If False, stocks are not filtered by price or volume data, simply return random Portfolio. @param dmPrice: Optional price data, if not provided, data access will be queried @param dmVolume: Optional volume data, if not provided, data access will be queried @param fNonNan: Optional non-nan percent for filter, default is .95 @param fPriceVolume: Optional price*volume for filter, default is 100,000 @warning: Does not work for all sets of optional inputs, e.g. if you don't include dtStart, dtEnd, you need to include dmPrice/dmVolume @return list of stocks which meet the criteria """ if( lsStocks is None ): if( dmPrice is None and dmVolume is None ): norObj = da.DataAccess('Norgate') lsStocks = norObj.get_all_symbols() elif( not dmPrice is None ): lsStocks = list(dmPrice.columns) else: lsStocks = list(dmVolume.columns) if( dmPrice is None and dmVolume is None and bFilter == True ): norObj = da.DataAccess('Norgate') ldtTimestamps = du.getNYSEdays( dtStart, dtEnd, dt.timedelta(hours=16) ) # if dmPrice and dmVol are provided then we don't query it every time """ bPullPrice = False bPullVol = False if( dmPrice is None ): bPullPrice = True if( dmVolume is None ): bPullVol = True # Default seed (none) uses system clock """ rand.seed(lSeed) lsRetStocks = [] # Loop until we have enough randomly selected stocks """ llRemainingIndexes = range(0,len(lsStocks)) lsValid = None while( len(lsRetStocks) != lNum ): lsCheckStocks = [] for i in range(lNum - len(lsRetStocks)): lRemaining = len(llRemainingIndexes) if( lRemaining == 0 ): print('Error in getRandPort: ran out of stocks') return lsRetStocks # Pick a stock and remove it from the list of remaining stocks """ lPicked = rand.randint(0, lRemaining-1) lsCheckStocks.append( lsStocks[ llRemainingIndexes.pop(lPicked) ] ) # If bFilter is false""" # simply return our first list of stocks, don't check prive/vol """ if( not bFilter ): return sorted(lsCheckStocks) # Get data if needed """ if( bPullPrice ): dmPrice = norObj.get_data( ldtTimestamps, lsCheckStocks, 'close' ) # Get data if needed """ if( bPullVol ): dmVolume = norObj.get_data(ldtTimestamps, lsCheckStocks, 'volume' ) # Only query this once if data is provided""" # else query every time with new data """ if( lsValid is None or bPullVol or bPullPrice ): lsValid = stockFilter(dmPrice, dmVolume, fNonNan, fPriceVolume) for sAdd in lsValid: if sAdd in lsCheckStocks: lsRetStocks.append( sAdd ) return sorted(lsRetStocks)