Esempio n. 1
0
def share_table2fund(share_table):
    """
    @summary converts data frame of shares into fund values
    @param share_table: data frame containing shares on days transactions occured
    @return fund : time series containing fund value over time
    @return leverage : time series containing fund value over time
    """
    # Get the data from the data store
    dataobj = da.DataAccess('mysql')
    startday = share_table.index[0]
    endday = share_table.index[-1]

    symbols = list(share_table.columns)
    symbols.remove('_CASH')

    # print symbols

    # Get desired timestamps
    timeofday = dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(startday - dt.timedelta(days=5),
                                endday + dt.timedelta(days=1), timeofday)
    historic = dataobj.get_data(timestamps, symbols, ["close"])[0]
    historic.fillna(method='ffill', inplace=True)
    historic["_CASH"] = 1
    closest = historic[historic.index <= share_table.index[0]].ix[:]
    ts_leverage = pandas.Series(0, index=[closest.index[-1]])

    # start shares/fund out as 100% cash
    first_val = closest.ix[-1] * share_table.ix[0]
    fund_ts = pandas.Series([first_val.sum(axis=1)], index=[closest.index[-1]])
    prev_row = share_table.ix[0]
    for row_index, row in share_table.iterrows():
        # print row_index
        trade_price = historic.ix[row_index:].ix[0:1]
        trade_date = trade_price.index[0]

        # print trade_date

        # get stock prices on all the days up until this trade
        to_calculate = historic[(historic.index <= trade_date)
                                & (historic.index > fund_ts.index[-1])]
        # multiply prices by our current shares
        values_by_stock = to_calculate * prev_row

        # for date, sym in values_by_stock.iteritems():
        #     print date,sym
        # print values_by_stock
        prev_row = row
        #update leverage
        ts_leverage = _calculate_leverage(values_by_stock, ts_leverage)

        # calculate total value and append to our fund history
        fund_ts = fund_ts.append([values_by_stock.sum(axis=1)])
    return [fund_ts, ts_leverage]
Esempio n. 2
0
def main():
    ''' Main Function'''

    # Start and End date of the charts
    dt_start = dt.datetime(2004, 1, 1)
    dt_end = dt.datetime(2009, 12, 31)

    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('EODHistoricalData')

    # List of symbols - First 20
    ls_symbols = c_dataobj.get_symbols_from_list('sp5002012')
    ls_symbols = ls_symbols[:20]
    ls_symbols.append('_CASH')

    # Creating the first allocation row
    na_vals = np.random.randint(0, 1000, len(ls_symbols))
    # Normalize the row - Typecasting as everything is int.
    na_vals = na_vals / float(sum(na_vals))
    # Reshape to a 2D matrix to append into dataframe.
    na_vals = na_vals.reshape(1, -1)

    # Creating Allocation DataFrames
    df_alloc = pd.DataFrame(na_vals,
                            index=[ldt_timestamps[0]],
                            columns=ls_symbols)

    dt_last_date = ldt_timestamps[0]
    # Looping through all dates and creating monthly allocations
    for dt_date in ldt_timestamps[1:]:
        if dt_last_date.month != dt_date.month:
            # Create allocation
            na_vals = np.random.randint(0, 1000, len(ls_symbols))
            na_vals = na_vals / float(sum(na_vals))
            na_vals = na_vals.reshape(1, -1)
            # Append to the dataframe
            df_new_row = pd.DataFrame(na_vals,
                                      index=[dt_date],
                                      columns=ls_symbols)
            df_alloc = df_alloc.append(df_new_row)
        dt_last_date = dt_date

    # Create the outpul pickle file for the dataframe.
    output = open('allocation.pkl', 'wb')
    pickle.dump(df_alloc, output)
Esempio n. 3
0
def daily(lfFunds):
    """
    @summary Computes daily returns centered around 0
    @param funds: A time series containing daily fund values
    @return an array of daily returns
    """
    if type(lfFunds) == type(pd.Series()):
        ldt_timestamps = du.getNYSEdays(lfFunds.index[0], lfFunds.index[-1], dt.timedelta(hours=16))
        lfFunds = lfFunds.reindex(index=ldt_timestamps, method='ffill')
    nds = np.asarray(deepcopy(lfFunds))
    s = np.shape(nds)
    if len(s) == 1:
        nds = np.expand_dims(nds, 1)
    returnize0(nds)
    return(nds)
Esempio n. 4
0
def calculate_efficiency(dt_start_date, dt_end_date, s_stock):
    """
    @summary calculates the exit-entry/high-low trade efficiency of a stock from historical data
    @param start_date: entry point for the trade
    @param end_date: exit point for the trade
    @param stock: stock to compute efficiency for
    @return: float representing efficiency
    """
    # Get the data from the data store
    dataobj = da.DataAccess('mysql')

    # Get desired timestamps
    timeofday = dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(dt_start_date,
                                dt_end_date + dt.timedelta(days=1), timeofday)
    historic = dataobj.get_data(timestamps, [s_stock], ["close"])[0]
    # print "######"
    # print historic
    hi = numpy.max(historic.values)
    low = numpy.min(historic.values)
    entry = historic.values[0]
    exit_price = historic.values[-1]
    return (((exit_price - entry) / (hi - low))[0])
Esempio n. 5
0
def main():
    ''' Main Function'''

    # List of symbols
    ls_symbols = ["AAPL", "GLD", "IJR", "SPY", "XOM"]

    # Start and End date of the charts
    dt_start = dt.datetime(2006, 1, 1)
    dt_end = dt.datetime(2010, 12, 31)

    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)

    # Creating an object of the dataaccess class
    c_dataobj = da.DataAccess('EODHistoricalData')

    # Keys to be read from the data, it is good to read everything in one go.
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']

    # Reading the data, now d_data is a dictionary with the keys above.
    # Timestamps and symbols are the ones that were specified before.
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))

    # Filling the data for NAN
    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method='ffill')
        d_data[s_key] = d_data[s_key].fillna(method='bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)

    # Getting the numpy ndarray of close prices.
    na_price = d_data['close'].values

    # Plotting the prices with x-axis=timestamps
    plt.clf()
    plt.plot(ldt_timestamps, na_price)
    plt.legend(ls_symbols)
    plt.ylabel('Adjusted Close')
    plt.xlabel('Date')
    plt.savefig('adjustedclose.pdf', format='pdf')

    # Normalizing the prices to start at 1 and see relative returns
    na_normalized_price = na_price / na_price[0, :]

    # Plotting the prices with x-axis=timestamps
    plt.clf()
    plt.plot(ldt_timestamps, na_normalized_price)
    plt.legend(ls_symbols)
    plt.ylabel('Normalized Close')
    plt.xlabel('Date')
    plt.savefig('normalized.pdf', format='pdf')

    # Copy the normalized prices to a new ndarry to find returns.
    na_rets = na_normalized_price.copy()

    # Calculate the daily returns of the prices. (Inplace calculation)
    # returnize0 works on ndarray and not dataframes.
    tsu.returnize0(na_rets)

    # Plotting the plot of daily returns
    plt.clf()
    plt.plot(ldt_timestamps[0:50], na_rets[0:50, 3])  # $SPX 50 days
    plt.plot(ldt_timestamps[0:50], na_rets[0:50, 4])  # XOM 50 days
    plt.axhline(y=0, color='r')
    plt.legend(['$SPX', 'XOM'])
    plt.ylabel('Daily Returns')
    plt.xlabel('Date')
    plt.savefig('rets.pdf', format='pdf')

    # Plotting the scatter plot of daily returns between XOM VS $SPX
    plt.clf()
    plt.scatter(na_rets[:, 3], na_rets[:, 4], c='blue')
    plt.ylabel('XOM')
    plt.xlabel('$SPX')
    plt.savefig('scatterSPXvXOM.pdf', format='pdf')

    # Plotting the scatter plot of daily returns between $SPX VS GLD
    plt.clf()
    plt.scatter(na_rets[:, 3], na_rets[:, 1], c='blue')  # $SPX v GLD
    plt.ylabel('GLD')
    plt.xlabel('$SPX')
    plt.savefig('scatterSPXvGLD.pdf', format='pdf')
Esempio n. 6
0
# Testing pftk.pftkstudy
try:
    import pftk.pftkstudy.event_profiler
    print("pftk.pftkstudy is installed and can be imported")
except ImportError:
    exit("Error : pftk.pftkstudy can not be imported.")
print

# Checking that the data installed is correct.
# Start and End date of the charts
dt_start = dt.datetime(2012, 2, 10)
dt_end = dt.datetime(2012, 2, 24)
dt_timeofday = dt.timedelta(hours=16)

# Get a list of trading days between the start and the end.
ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
ls_symbols = ['MSFT', 'GOOG']

# Creating an object of the dataaccess class with Yahoo as the source.
c_dataobj = da.DataAccess('Yahoo', verbose=True)
# Reading adjusted_close prices
df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close")
print(df_close)
print
print("\nCorrect Output using the Default Data should be : ")
print("Assignments use this data for grading")
print("                      MSFT    GOOG")
print("2012-02-10 16:00:00  29.90  605.91")
print("2012-02-13 16:00:00  29.98  612.20")
print("2012-02-14 16:00:00  29.86  609.76")
print("2012-02-15 16:00:00  29.66  605.56")
Esempio n. 7
0
def main():
    '''Main Function'''

    # S&P 100
    ls_symbols = ['AAPL', 'ABT', 'ACN', 'AEP', 'ALL', 'AMGN', 'AMZN', 'APC', 'AXP', 'BA', 'BAC', 'BAX', 'BHI', 'BK', 'BMY', 'CAT', 'C', 'CL', 'CMCSA', 'COF', 'COP', 'COST', 'CPB', 'CSCO', 'CVS', 'CVX', 'DD', 'DELL', 'DIS', 'DOW', 'DVN', 'EBAY', 'EMC', 'EXC', 'F', 'FCX', 'FDX', 'GD', 'GE', 'GILD', 'GOOG', 'GS', 'HAL', 'HD', 'HNZ', 'HON', 'HPQ', 'IBM', 'INTC', 'JNJ', 'JPM', 'KO', 'LLY', 'LMT', 'LOW', 'MA', 'MCD', 'MDT', 'MET', 'MMM', 'MO', 'MON', 'MRK', 'MS', 'MSFT', 'NKE', 'NOV', 'NSC', 'NWSA', 'NYX', 'ORCL', 'OXY', 'PEP', 'PFE', 'PG', 'PM', 'QCOM', 'RF', 'RTN', 'SBUX', 'SLB', 'SO', 'SPG', 'T', 'TGT', 'TWX', 'TXN', 'UNH', 'UPS', 'USB', 'UTX', 'VZ', 'WFC', 'WMB', 'WMT', 'XOM']

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('EODHistoricalData')

    ls_all_syms = c_dataobj.get_all_symbols()
    # Bad symbols are symbols present in portfolio but not in all syms
    ls_bad_syms = list(set(ls_symbols) - set(ls_all_syms))
    for s_sym in ls_bad_syms:
        i_index = ls_symbols.index(s_sym)
        ls_symbols.pop(i_index)

    # Start and End date of the charts
    dt_end = dt.datetime(2010, 1, 1)
    dt_start = dt_end - dt.timedelta(days=365)
    dt_test = dt_end + dt.timedelta(days=365)

    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
    ldt_timestamps_test = du.getNYSEdays(dt_end, dt_test, dt_timeofday)

    # Reading just the close prices
    df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close")
    df_close_test = c_dataobj.get_data(ldt_timestamps_test, ls_symbols, "close")

    # Filling the data for missing NAN values
    df_close = df_close.fillna(method='ffill')
    df_close = df_close.fillna(method='bfill')
    df_close_test = df_close_test.fillna(method='ffill')
    df_close_test = df_close_test.fillna(method='bfill')

    # Copying the data values to a numpy array to get returns
    na_data = df_close.values.copy()
    na_data_test = df_close_test.values.copy()

    # Getting the daily returns
    tsu.returnize0(na_data)
    tsu.returnize0(na_data_test)

    # Calculating the frontier.
    (lf_returns, lf_std, lna_portfolios, na_avgrets, na_std) = getFrontier(na_data)
    (lf_returns_test, lf_std_test, unused, unused, unused) = getFrontier(na_data_test)

    # Plotting the efficient frontier
    plt.clf()
    plt.plot(lf_std, lf_returns, 'b')
    plt.plot(lf_std_test, lf_returns_test, 'r')

    # Plot where the efficient frontier would be the following year
    lf_ret_port_test = []
    lf_std_port_test = []
    for na_portfolio in lna_portfolios:
        na_port_rets = np.dot(na_data_test, na_portfolio)
        lf_std_port_test.append(np.std(na_port_rets))
        lf_ret_port_test.append(np.average(na_port_rets))

    plt.plot(lf_std_port_test, lf_ret_port_test, 'k')

    # Plot indivisual stock risk/return as green +
    for i, f_ret in enumerate(na_avgrets):
        plt.plot(na_std[i], f_ret, 'g+')

    # # Plot some arrows showing transistion of efficient frontier
    # for i in range(0, 101, 10):
    #     plt.arrow(lf_std[i], lf_returns[i], lf_std_port_test[i] - lf_std[i],
    #                 lf_ret_port_test[i] - lf_returns[i], color='k')

    # Labels and Axis
    plt.legend(['2009 Frontier', '2010 Frontier',
        'Performance of \'09 Frontier in 2010'], loc='lower right')
    plt.title('Efficient Frontier For S&P 100 ')
    plt.ylabel('Expected Return')
    plt.xlabel('StDev')
    plt.savefig('tutorial8.pdf', format='pdf')
Esempio n. 8
0
def main():
    ''' Main Function'''
    # Reading the portfolio
    na_portfolio = np.loadtxt('tutorial3_portfolio.csv',
                              dtype='U5,f4',
                              delimiter=',',
                              comments="#",
                              skiprows=1)
    print(na_portfolio)

    # Sorting the portfolio by symbol name
    na_portfolio = sorted(na_portfolio, key=lambda x: x[0])
    print(na_portfolio)

    # Create two list for symbol names and allocation
    ls_port_syms = []
    lf_port_alloc = []
    for port in na_portfolio:
        ls_port_syms.append(port[0])
        lf_port_alloc.append(port[1])

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('EODHistoricalData')
    ls_all_syms = c_dataobj.get_all_symbols()
    print(ls_all_syms)
    # Bad symbols are symbols present in portfolio but not in all syms
    ls_bad_syms = list(set(ls_port_syms) - set(ls_all_syms))

    if len(ls_bad_syms) != 0:
        print("Portfolio contains bad symbols : ", ls_bad_syms)

    for s_sym in ls_bad_syms:
        i_index = ls_port_syms.index(s_sym)
        ls_port_syms.pop(i_index)
        lf_port_alloc.pop(i_index)

    # Reading the historical data.
    dt_end = dt.datetime(2011, 1, 1)
    dt_start = dt_end - dt.timedelta(days=1095)  # Three years
    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)

    # Keys to be read from the data, it is good to read everything in one go.
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']

    # Reading the data, now d_data is a dictionary with the keys above.
    # Timestamps and symbols are the ones that were specified before.
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_port_syms, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))

    # Copying close price into separate dataframe to find rets
    df_rets = d_data['close'].copy()
    # Filling the data.
    df_rets = df_rets.fillna(method='ffill')
    df_rets = df_rets.fillna(method='bfill')
    df_rets = df_rets.fillna(1.0)

    # Numpy matrix of filled data values
    na_rets = df_rets.values
    # returnize0 works on ndarray and not dataframes.
    tsu.returnize0(na_rets)

    # Estimate portfolio returns
    na_portrets = np.sum(na_rets * lf_port_alloc, axis=1)
    na_port_total = np.cumprod(na_portrets + 1)
    na_component_total = np.cumprod(na_rets + 1, axis=0)

    # Plotting the results
    plt.clf()
    fig = plt.figure()
    fig.add_subplot(111)
    plt.plot(ldt_timestamps, na_component_total, alpha=0.4)
    plt.plot(ldt_timestamps, na_port_total)
    ls_names = ls_port_syms
    ls_names.append('Portfolio')
    plt.legend(ls_names)
    plt.ylabel('Cumulative Returns')
    plt.xlabel('Date')
    fig.autofmt_xdate(rotation=45)
    plt.savefig('tutorial3.pdf', format='pdf')
Esempio n. 9
0
def main():
    '''Main Function'''

    # List of symbols
    ls_symbols = ["AAPL", "GOOG"]

    # Start and End date of the charts
    dt_start = dt.datetime(2008, 1, 1)
    dt_end = dt.datetime(2010, 12, 31)

    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('Yahoo')

    # Reading just the close prices
    df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close")
    df_close = df_close.fillna(method='ffill')
    df_close = df_close.fillna(method='bfill')
    df_close = df_close.fillna(1.0)

    # Creating the allocation dataframe
    # We offset the time for the simulator to have atleast one
    # datavalue before the allocation.
    df_alloc = pd.DataFrame(np.array([[0.5, 0.5]]),
                            index=[ldt_timestamps[0] + dt.timedelta(hours=5)],
                            columns=ls_symbols)

    dt_last_date = ldt_timestamps[0]
    # Looping through all dates and creating monthly allocations
    for dt_date in ldt_timestamps[1:]:
        if dt_last_date.month != dt_date.month:
            # Create allocation
            na_vals = np.random.randint(0, 1000, len(ls_symbols))
            na_vals = na_vals / float(sum(na_vals))
            na_vals = na_vals.reshape(1, -1)
            # Append to the dataframe
            df_new_row = pd.DataFrame(na_vals,
                                      index=[dt_date],
                                      columns=ls_symbols)
            df_alloc = df_alloc.append(df_new_row)
        dt_last_date = dt_date

    # Adding cash to the allocation matrix
    df_alloc['_CASH'] = 0.0

    # Running the simulator on the allocation frame
    (ts_funds, ts_leverage, f_commission, f_slippage,
     f_borrow_cost) = tradesim(df_alloc,
                               df_close,
                               f_start_cash=10000.0,
                               i_leastcount=1,
                               b_followleastcount=True,
                               f_slippage=0.0005,
                               f_minimumcommision=5.0,
                               f_commision_share=0.0035,
                               i_target_leverage=1,
                               f_rate_borrow=3.5,
                               log="transaction.csv")

    print("Simulated Fund Time Series : ")
    print(ts_funds)
    print("Transaction Costs : ")
    print("Commissions : ", f_commission)
    print("Slippage : ", f_slippage)
    print("Borrowing Cost : ", f_borrow_cost)
Esempio n. 10
0
def getRandPort( lNum, dtStart=None, dtEnd=None, lsStocks=None,\
 dmPrice=None, dmVolume=None, bFilter=True, fNonNan=0.95,\
 fPriceVolume=100*1000, lSeed=None ):
    """
    @summary Returns a random portfolio based on certain criteria.
    @param lNum: Number of stocks to be included
    @param dtStart: Start date for portfolio
    @param dtEnd: End date for portfolio
    @param lsStocks: Optional list of ticker symbols, if not provided all symbols will be used
    @param bFilter: If False, stocks are not filtered by price or volume data, simply return random Portfolio.
    @param dmPrice: Optional price data, if not provided, data access will be queried
    @param dmVolume: Optional volume data, if not provided, data access will be queried
    @param fNonNan: Optional non-nan percent for filter, default is .95
    @param fPriceVolume: Optional price*volume for filter, default is 100,000
    @warning: Does not work for all sets of optional inputs, e.g. if you don't include dtStart, dtEnd, you need 
              to include dmPrice/dmVolume
    @return list of stocks which meet the criteria
    """
    
    if( lsStocks is None ):
        if( dmPrice is None and dmVolume is None ):
            norObj = da.DataAccess('Norgate') 
            lsStocks = norObj.get_all_symbols()
        elif( not dmPrice is None ):
            lsStocks = list(dmPrice.columns)
        else:
            lsStocks = list(dmVolume.columns)
    
    if( dmPrice is None and dmVolume is None and bFilter == True ):
        norObj = da.DataAccess('Norgate')  
        ldtTimestamps = du.getNYSEdays( dtStart, dtEnd, dt.timedelta(hours=16) )

    # if dmPrice and dmVol are provided then we don't query it every time """
    bPullPrice = False
    bPullVol = False
    if( dmPrice is None ):
        bPullPrice = True
    if( dmVolume is None ):
        bPullVol = True
            
    # Default seed (none) uses system clock """    
    rand.seed(lSeed)     
    lsRetStocks = []

    # Loop until we have enough randomly selected stocks """
    llRemainingIndexes = range(0,len(lsStocks))
    lsValid = None
    while( len(lsRetStocks) != lNum ):

        lsCheckStocks = []
        for i in range(lNum - len(lsRetStocks)):
            lRemaining = len(llRemainingIndexes)
            if( lRemaining == 0 ):
                print('Error in getRandPort: ran out of stocks')
                return lsRetStocks
            
            # Pick a stock and remove it from the list of remaining stocks """
            lPicked =  rand.randint(0, lRemaining-1)
            lsCheckStocks.append( lsStocks[ llRemainingIndexes.pop(lPicked) ] )

        # If bFilter is false"""
        # simply return our first list of stocks, don't check prive/vol """
        if( not bFilter ):
            return sorted(lsCheckStocks)
            

        # Get data if needed """
        if( bPullPrice ):
            dmPrice = norObj.get_data( ldtTimestamps, lsCheckStocks, 'close' )

        # Get data if needed """
        if( bPullVol ):
            dmVolume = norObj.get_data(ldtTimestamps, lsCheckStocks, 'volume' )

        # Only query this once if data is provided"""
        # else query every time with new data """
        if( lsValid is None or bPullVol or bPullPrice ):
            lsValid = stockFilter(dmPrice, dmVolume, fNonNan, fPriceVolume)
        
        for sAdd in lsValid:
            if sAdd in lsCheckStocks:
                lsRetStocks.append( sAdd )

    return sorted(lsRetStocks)