def get_data(dt_start, dt_end, symbols, lookback=0):
    """
    Given a date range, return the adjusted_close price for the given symbols
    If lookback is specified, it will move the start_date back that many trading days
    """
    assert(lookback >= 0)
    
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16))
    if lookback >= 0:
        lookback_timestamps = du.getNYSEdays(dt_start - dt.timedelta(days=lookback*2),dt_start, dt.timedelta(hours=16))
        if ldt_timestamps[0] == lookback_timestamps[-1]:
            debug("contains the end time")
            lookback_timestamps.pop() 
        ldt_timestamps = lookback_timestamps[-(lookback-1):] + ldt_timestamps

    dataobj = da.DataAccess('Yahoo')
    debug("getting data from %s to %s" \
          % tuple([dt_day.strftime("%Y-%m-%d")
                    for dt_day in (ldt_timestamps[ndx] for ndx in (0,-1))]))
    ldf_data = dataobj.get_data(ldt_timestamps, symbols, 'close')

    ldf_data = ldf_data.fillna(method='ffill')
    ldf_data = ldf_data.fillna(method='bfill')
    ldf_data = ldf_data.fillna(1.0)

    return ldf_data
def totalvalue(cash_ini,orderform,valueform):
    
    trades = pd.read_csv(orderform,header=None,sep=',')
    trades = trades.dropna(axis = 1, how='all')
    trades.columns = ['Year','Month','Day','Symbol','Order','Share']
    dateall = []
    for i in np.arange(len(trades.Year)):
        dateall.append(dt.datetime(trades['Year'][i],trades['Month'][i],trades['Day'][i],16))
    dateall = pd.to_datetime(dateall)
    trades=trades.drop(['Year','Month','Day'],axis=1)
    trades['Date']=dateall
    trades.set_index('Date',inplace=True)
    
    ls_symbols = []
    for symbol in trades.Symbol:
        if symbol not in ls_symbols:
            ls_symbols.append(symbol)
            
    startdate = dateall[0]
    enddate = dateall[-1]
    dt_timeofday = dt.timedelta(hours=16)
    ldt_timestamps = du.getNYSEdays(startdate,enddate+dt_timeofday,dt_timeofday)
    ls_keys = 'close'
    c_dataobj = da.DataAccess('Yahoo')
    price = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    orders = price*np.NaN
    orders = orders.fillna(0)
    for i in np.arange(len(trades.index)):
        ind = trades.index[i]
        if trades.ix[i,'Order']=='Buy':
            orders.loc[ind,trades.ix[i,'Symbol']]+=trades.ix[i,'Share']
        else:
            orders.loc[ind,trades.ix[i,'Symbol']]+=-trades.ix[i,'Share']
    #    keys = ['price','orders']
    #    trading_table = pd.concat([ldf_data,orders],keys=keys,axis=1)
    cash = np.zeros(np.size(price[ls_symbols[0]]),dtype=np.float)
    cash[0] = cash_ini
    # updating the cash value
    for i in np.arange(len(orders.index)):
        if i == 0: 
            cash[i] = cash[i] - pd.Series.sum(price.ix[i,:]*orders.ix[i,:])
        else:
            cash[i] = cash[i-1] - pd.Series.sum(price.ix[i,:]*orders.ix[i,:])
    # updating ownership
    ownership = orders*np.NaN
    for i in np.arange(len(orders.index)):
        ownership.ix[i,:]=orders.ix[:i+1,:].sum(axis=0) 
        
    # updating total portofolio value
    value = np.zeros_like(cash)
    for i in np.arange(len(ownership.index)):
        value[i] = pd.Series.sum(price.ix[i,:]*ownership.ix[i,:]) 
    keys = ['price','orders','ownership']
    trading_table = pd.concat([price,orders,ownership],keys = keys, axis=1)
    trading_table[('value','CASH')]=cash
    trading_table[('value','STOCK')]=value
    total = np.zeros_like(cash)
    total = cash + value
    trading_table[('value','TOTAL')]=total
    trading_table[('value','TOTAL')].to_csv(valueform)
Esempio n. 3
0
def simulate(startdate, enddate, symbols, alloc):
	ls_symbols = symbols
	lf_alloc = alloc
	dt_start = startdate
	dt_end = enddate
	dt_timeofday=dt.timedelta(hours=16)
	ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)

	c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)
	ls_keys = ['open', 'close', 'high', 'low', 'volume']
	ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
	d_data = dict(zip(ls_keys, ldf_data))
	for s_key in ls_keys:
        	d_data[s_key] = d_data[s_key].fillna(method='ffill')
        	d_data[s_key] = d_data[s_key].fillna(method='bfill')
        	d_data[s_key] = d_data[s_key].fillna(1.0)
	
	na_price = d_data['close'].values
	
	na_normalized_price = na_price/na_price[0, :]
	na_port = na_normalized_price*lf_alloc
	na_port_daily_totals = np.sum(na_port, axis = 1)
	na_rets = na_port_daily_totals.copy()
	tsu.returnize0(na_rets)

	vol = np.std(na_rets)
	daily_ret = np.mean(na_rets)
	sharpe = mt.sqrt(252)*daily_ret/vol
	cum_ret = na_port_daily_totals[-1]/na_port_daily_totals[0]

	return (vol, daily_ret, sharpe, cum_ret)
Esempio n. 4
0
def simulate(dt_start, dt_end, ls_symbols, ratio):
    dt_timeofday = dt.timedelta(hours=16)
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
    c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)
    ls_keys = ['open','high','low','close','volume','actual_close']
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))
    
    #for l in ls_symbols:
        

    na_price = d_data['close'].values
    na_normalized_price = na_price / na_price[0,:]
    na_normalized_price_ratio = np.multiply(na_normalized_price, ratio)
    all_price_ratio = np.sum(na_normalized_price_ratio, axis=1)    
    
    """ calculate Volatility """
    vol = np.std(tsu.returnize0(all_price_ratio))    
    
    """ daily return """
    daily_ret = np.mean(tsu.returnize0(all_price_ratio))
    """ cumulative daily return"""
    cum_ret = 0
    sharpe = 0
    
    return vol, daily_ret, sharpe, cum_ret
Esempio n. 5
0
def GetNormalizedReturn(dt_start,dt_end, symbols,c_dataobj):
    #
    #

    #
    # Keys to be read from the data, it is good to read everything in one go.
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']

    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)

    # Timestamps and symbols are the ones that were specified before.
    ldf_data = c_dataobj.get_data(ldt_timestamps, symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))
    
    #normalized returns data frame
    df_rets=d_data['close'].copy()

    close=df_rets.values
    #normalized returns
    close_norm=close/close[0,:]

    #print 'close norm = ',close_norm.shape

    return close_norm
Esempio n. 6
0
def portfolio_by_date(start_date, end_date, codes, trades):
    ny_days = du.getNYSEdays(start_date, end_date, dt.timedelta(hours=00))
    portfolio_codes = collections.OrderedDict()

    # {"AAPL": [0, 0, 0, ...], "GOOG": [0, 0, ...]}
    for x in codes:
        portfolio_codes[x] = np.zeros(len(ny_days))

    portfolio_trades = copy.deepcopy(portfolio_codes)

    for curr_trade in trades:
        if curr_trade[2].lower() == "sell":
            value = curr_trade[-1]
        else:
            value = curr_trade[-1] * -1

        index = ny_days.index(curr_trade[0])
        stock = curr_trade[1]

        portfolio_codes[stock][index] += value

        for i in range(index, len(portfolio_trades[stock])):
            portfolio_trades[stock][i] += value

    df_portfolio = pd.DataFrame(portfolio_codes).sort(axis=1)
    df_ptrades = pd.DataFrame(portfolio_trades).sort(axis=1)

    return df_portfolio, df_ptrades
 def bollinger_band(self, tick, window=20, k=2, nml=False, mi_only=False):
     """
     Return four arrays for Bollinger Band.
     The first one is the moving average.
     The second one is the upper band.
     The thrid one is the lower band.
     The fourth one is the Bollinger value.
     If mi_only, then return the moving average only.
     """
     ldt_timestamps = self.index
     dt_timeofday = dt.timedelta(hours=16)
     days_delta = dt.timedelta(days=(np.ceil(window*7/5)+5))
     dt_start = ldt_timestamps[0] - days_delta
     dt_end = ldt_timestamps[0] - dt.timedelta(days=1)
     pre_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
     # ldf_data has the data prior to our current interest.
     # This is used to calculate moving average for the first window.
     ldf_data = ut.get_tickdata([tick], pre_timestamps)
     if nml:
         ma_data = pd.concat([ldf_data[tick]['nml_close'], self['nml_close']]) 
     else:
         ma_data = pd.concat([ldf_data[tick]['close'], self['close']])
     bo = dict()
     bo['mi'] = pd.rolling_mean(ma_data, window=window)[ldt_timestamps] 
     if mi_only:
         return bo['mi']
     else:
         sigma = pd.rolling_std(ma_data, window=window)
         bo['up'] = bo['mi'] + k * sigma[ldt_timestamps] 
         bo['lo'] = bo['mi'] - k * sigma[ldt_timestamps] 
         bo['ba'] = (ma_data[ldt_timestamps] - bo['mi']) / (k * sigma[ldt_timestamps])
         return bo
    def marketsim(cash, orders_file, data_item, dataobj):
    # Read orders
        orders = defaultdict(list)
        symbols = set([])
        for year, month, day, sym, action, num in csv.reader(open(orders_file, "rU")):
            orders[dt.date(int(year), int(month), int(day))].append((sym, action, int(num)))
            symbols.add(sym)
    
        days = orders.keys()
        days.sort()
        day, end = days[0], days[-1]
    
    # Reading the Data for the list of Symbols.
        timestamps = du.getNYSEdays(dt.datetime(day.year,day.month,day.day),
                             dt.datetime(end.year,end.month,end.day+1),
                             dt.timedelta(hours=16))
    
#    dataobj = da.DataAccess('Yahoo', cachestalltime = 0)
        close = dataobj.get_data(timestamps, symbols, data_item)
    
        values = []
        portfolio = pf.Portfolio(cash)
        for i, t in enumerate(timestamps):
            for sym, action, num in orders[dt.date(t.year, t.month, t.day)]:
                if action == 'Sell': num *= -1
                portfolio.update(sym, num, close[sym][i])
        
        entry = (t.year, t.month, t.day, portfolio.value(close, i))
        values.append(entry)
        
        return values     
def simulate(startDate, endDate, symbolsEq, allocationEq) :
    
    dt_timeofday = dt.timedelta(hours=16)
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
    c_dataobj = da.DataAccess('Yahoo')
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))
    na_price = d_data['close'].values
    na_normalized_price = na_price / na_price[0, :]

    symbolSP = ["$SPX"]
    sp_data = c_dataobj.get_data(ldt_timestamps, symbolSP, ls_keys)
    sp_d_data = dict(zip(ls_keys, sp_data))
    sp_price = sp_d_data['close'].values
    sp_price_normalized = sp_price / sp_price[0, :]
    dailyReturnSP = sp_price_normalized.copy()
    tsu.returnize0(dailyReturnSP)
    
    na_normalizedPriceAllocation = na_normalized_price*allocationEq
    na_sumRows = na_normalizedPriceAllocation.sum(axis=1)
    dailyReturn = na_sumRows.copy()
    tsu.returnize0(dailyReturn)
    avgDailyReturn = np.average(dailyReturn)    
    dailyReturnStdDev = np.std(dailyReturn)

    sharpeRatio = np.sqrt(252)*avgDailyReturn/dailyReturnStdDev
    excessReturn = dailyReturn - dailyReturnSP
    
    avgExcessReturn = np.average(excessReturn)
    excessReturnStdDev = np.std(excessReturn)
    cumulativeReturn = na_sumRows[-1]

    return dailyReturnStdDev, avgDailyReturn, sharpeRatio, cumulativeReturn  
Esempio n. 10
0
    def __init__(self,start,end,symbols):
        self.start = start
        self.end = end
        self.symbols = symbols

        timeofday = dt.timedelta(hours=16)
        timestamps = du.getNYSEdays(start, end, timeofday)

        keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']

        # initialize query and get portfolio close data
        dataobj =da.DataAccess('Yahoo')
        raw_data = dataobj.get_data(timestamps, symbols, keys)
        close_data = dict(zip(keys, raw_data))["close"]

        # normalize data, get number of days, convert close data into array
        close_data_array = close_data.values
        close_data_array = close_data_array / close_data_array[0,:]
        
        

        # get reference data "$SPX"
        ref = da.DataAccess('Yahoo')
        ref_data = ref.get_data(timestamps, ["$SPX"], keys)[2] #reference close data
        ref_data_array = ref_data.values
        ref_data_array = ref_data_array / ref_data_array[0,:]
        
        self.timestamps = timestamps
        self.close_data_array = close_data_array
        self.spx_returns = ref_data_array
def main(argv):
    start_date  = dt.datetime.strptime(argv[0], "%Y-%m-%d")
    end_date    = dt.datetime.strptime(argv[1], "%Y-%m-%d")
    symbol_list = argv[2] #sp5002012
    output_dir  = argv[3]

    timestamps = du.getNYSEdays(start_date, end_date, dt.timedelta(hours = 16))
    dataobj    = da.DataAccess('Yahoo')

    symbols    = dataobj.get_symbols_from_list(symbol_list)
    symbols.append('SPY')

    keys       = ['close', 'actual_close']
    data_dict  = dict(zip(keys, dataobj.get_data(timestamps, symbols, keys)))

    for key in keys:
        data_dict[key] = data_dict[key].fillna(method = 'ffill')
        data_dict[key] = data_dict[key].fillna(method = 'bfill')
        data_dict[key] = data_dict[key].fillna(1.0)

    bollinger  = bollinger_bands(symbols, data_dict['close'], timestamps)
    events     = find_events(symbols, bollinger, timestamps)

    ep.eventprofiler(
        events, 
        data_dict, 
        i_lookback = 20, 
        i_lookforward = 20,
        s_filename = file_name(output_dir, symbol_list, start_date, end_date), 
        b_market_neutral = True, 
        b_errorbars = True,
        s_market_sym = 'SPY'
    )
Esempio n. 12
0
def get_data(symbols, dt_start, dt_end, ):
    dt_timeofday = dt.timedelta(hours=16)

    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
    c_dataobj = da.DataAccess('Yahoo')
    close_price = c_dataobj.get_data(ldt_timestamps, symbols, "close")
    return close_price.values
Esempio n. 13
0
def simulate(startdate, enddate, ls_symbols, ls_alloc):
    dt_timeofday = dt.timedelta(hours = 16)
    
    ldt_timestamps = du.getNYSEdays(startdate, enddate, dt_timeofday)
    
    c_dataobj = da.DataAccess('Yahoo', cachestalltime = 0)
    
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))
    
    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method='ffill')
        d_data[s_key] = d_data[s_key].fillna(method='bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)
        
    na_price = d_data['close'].values
    na_normalized_price = na_price / na_price[0,:]
    na_port_price = np.sum(ls_alloc * na_normalized_price, 1)
    
    na_daily_rets = na_port_price.copy()
    tsu.returnize0(na_daily_rets)
    
    vol = np.std(na_daily_rets)
    daily_ret = np.average(na_daily_rets)
    
    sharpe_ratio = np.sqrt(252) * daily_ret / vol
    cum_ret = na_port_price[-1]
    
    return vol, daily_ret, sharpe_ratio, cum_ret
def read_csv(file_name):
    ls_date = []
    ls_symbols = []
    # Read symbol list and read inital and end dates
    with open(file_name,'rU') as csv_in:
        order = csv.reader(csv_in,delimiter = ',')
        # for each row in .csv
        for row in order:
            ls_date.append([row[0],row[1],row[2]])
            #concatenate the date so we have a 1D list to use Set() 
            #ls_date.append(row[0]+row[1]+row[2])
            ls_symbols.append(row[3])
            #An equivalent way to do it is using numpy array
            #ls_symbols = np.append(ls_symbols,row[3])

    #ls_date2 = list(Set(ls_date))
    #ls_date = sorted(ls_date2, key = ls_date.index)
    # inital and final date
    ls_symbols = list(Set(ls_symbols))
    start, end = ls_date[0], ls_date[len(ls_date)-1] 
    dt_start, dt_end = str_dt(start), str_dt(end)
    # Create array of trading dates
    dt_timeofday = dt.timedelta(hours=16)
    # offset one day
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end+dt.timedelta(days = 1), dt_timeofday)
    return ldt_timestamps, ls_symbols
Esempio n. 15
0
def get_close_data(start_date, end_date, symbols):
    """
    Returns the adjusted close prices of the symbols passed in
    @param start_date: start date to grab data from
    @param end_date: end date to grab data from
    @param time_of_day: nubmer of hours in the day
    @param symbols: symbols to get data for
    @return: list of adjust close prices, indexed by symbol and list of
    dates for trading
    """
    
    # Grab the number of trading days
    dt_timeofday = dt.timedelta(hours=16)
    ldt_timestamps = du.getNYSEdays(start_date, end_date, dt_timeofday)

    # Grab data from QSTK data. Yahoo as the data source
    c_dataobj = da.DataAccess('Yahoo')
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
    ldf_data = c_dataobj.get_data(ldt_timestamps, symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))

    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method='ffill')
        d_data[s_key] = d_data[s_key].fillna(method='bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)

    close_price = d_data['close'].values

    return close_price, ldt_timestamps
Esempio n. 16
0
def load(ls_symbols, ldt_timestamps):
    dataobj = da.DataAccess('Yahoo')

    #print ldt_timestamps
    #print ls_symbols
    ldt_timestamps.sort()
    print "---"
    print ldt_timestamps[0]
    print ldt_timestamps[-1]
    ldt_timestamps = du.getNYSEdays(ldt_timestamps[0], ldt_timestamps[-1], dt.timedelta(hours=16))

    print "---"

    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
    ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)

    d_data = dict(zip(ls_keys, ldf_data))

    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method='ffill')
        d_data[s_key] = d_data[s_key].fillna(method='bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)
    #print d_data['close'].loc[ldt_timestamps[0]]
    #print d_data['close'].loc[ldt_timestamps[-1]]
    #print d_data['close'].loc[dt.datetime(2010,12,22,16)]
    return d_data
Esempio n. 17
0
def simulate(startdate, enddate, symbols, percentage):
  dt_timeofday = dt.timedelta(hours=16)
  ldt_timestamps = du.getNYSEdays(startdate, enddate, dt_timeofday)

  c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)
  ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
  ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
  d_data = dict(zip(ls_keys, ldf_data))


  normalized_vals = d_data['close'].values / d_data['close'].values[0, :]
  indiv_daily_value = percentage*normalized_vals
  total_dv = indiv_daily_value.sum(axis=1)
  
  #calculate daily return
  #print total_dv
  B = total_dv[1:]
  A = total_dv[0:-1]
  C = (B/A)-1
  dailyret = np.zeros(C.shape[0]+1)
  dailyret[1:] = C
  #print np.average(dailyret)
  sharpe = (math.sqrt(252)*np.average(dailyret))/np.std(dailyret)
  #print sharpe
  return (np.std(dailyret), np.average(dailyret), sharpe, (total_dv[-1]/total_dv[0]))
Esempio n. 18
0
def read_data(start_date, end_date, ls_symbols):
    ''' read the prices for the specified symbols'''
  
    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(start_date, end_date, dt_timeofday)
    
    print "Reading data"
    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('Yahoo', cachestalltime = 0)

    # Keys to be read from the data, it is good to read everything in one go.
    ls_keys = ['close']

    # Reading the data, now d_data is a dictionary with the keys above.
    # Timestamps and symbols are the ones that were specified before.
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    print "Done"
    
    d_data = dict(zip(ls_keys, ldf_data))
  
    # Filling the data for NAN
    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method='ffill')
        d_data[s_key] = d_data[s_key].fillna(method='bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)

    # Getting the numpy ndarray of close prices.
    na_price = d_data['close'].values
    na_price_df = pd.DataFrame(na_price, columns = ls_symbols, index = d_data['close'].index) 
    return(na_price_df)
def bollinger_bands(symbol,dt_start, dt_end,rolling_period):
    #define dates, data source, retrieve actual close price
    dt_end = du.getNextNNYSEdays(dt_end,1,dt.timedelta(hours = 16))[0]
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end,dt.timedelta(hours=16))
    c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)
    ls_keys = ['actual_close','close']
    ldf_data = c_dataobj.get_data(ldt_timestamps, symbol, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))

    actual_close = d_data['actual_close']

    #columbs: price, rolling average, rolling sttdev, bollinger_val
    bollinger_matrix = np.zeros((len(ldt_timestamps),4))
    rolling_prices = deque(np.zeros(rolling_period))

    #iterate through the actual
    for i in range(0, (len(ldt_timestamps))):
        bollinger_matrix[i,0] = actual_close[symbol[0]].ix[ldt_timestamps[i]]
        fx = rolling_prices.popleft()
        rolling_prices.append(bollinger_matrix[i,0])
        if (i < (rolling_period -1)): #no rolling average yet
            bollinger_matrix[i,1] = np.nan  #rolling avg
            bollinger_matrix[i,2] = np.nan  # rolling stddev
            bollinger_matrix[i,3] = np.nan  # bollinger_val
        else:
            bollinger_matrix[i,1] = np.average(rolling_prices)  #rolling avg
            print bollinger_matrix[i,1]
            bollinger_matrix[i,2] = np.std(rolling_prices)  # rolling stddev
            bollinger_matrix[i,3] = (bollinger_matrix[i,0] - bollinger_matrix[i,1])/bollinger_matrix[i,2] # bollinger_val: (price - rolling avg) / rolling stddev
    return bollinger_matrix, ldt_timestamps,actual_close
    def _generate_data(self):

        year = 2009        
        startday = dt.datetime(year-1, 12, 1)
        endday = dt.datetime(year+1, 1, 31)

        l_symbols = ['$SPX']

        #Get desired timestamps
        timeofday = dt.timedelta(hours = 16)
        ldt_timestamps = du.getNYSEdays(startday, endday, timeofday)

        dataobj = da.DataAccess('Norgate')
        self.df_close = dataobj.get_data( \
                        ldt_timestamps, l_symbols, "close", verbose=True)

        self.df_alloc = pand.DataFrame( \
                        index=[dt.datetime(year, 1, 1)], \
                                data=[-1], columns=l_symbols)

        for i in range(11):
            self.df_alloc = self.df_alloc.append( \
                     pand.DataFrame(index=[dt.datetime(year, i+2, 1)], \
                                      data=[-1], columns=l_symbols))

        self.df_alloc['_CASH'] = 0.0

        #Based on hand calculation using the transaction costs and slippage.
        self.i_open_result = 0.7541428779600005
Esempio n. 21
0
def portfolio_simulate(start_date, end_date, symbols, allocations):
    """ For the given portfolio and dates, calculate the std deviation of daily returns (volatility), the average
     daily return, the sharpe ratio, and the cumulative return """

    # Generate timestamps for the NYSE closing times
    closing_time = dt.timedelta(hours=16)
    timestamps = date_util.getNYSEdays(start_date, end_date, closing_time)

    # Get adjusted closing prices
    #stock_dao = data_access.DataAccess('Yahoo', cachestalltime=0)
    stock_dao = data_access.DataAccess('Yahoo')
    stock_data_as_list_of_data_frames = stock_dao.get_data(timestamps, symbols, ['close'])
    portfolio_closing_values = stock_data_as_list_of_data_frames[0]

    # Calculate adjusted closing prices normalized relative to initial closing prices
    initial_portfolio_closing_values = portfolio_closing_values.values[0,:]
    portfolio_normalized_closing_values = portfolio_closing_values / initial_portfolio_closing_values

    # Calculated portfolio normalized values
    portfolio_normalized_weighted_closing_values = portfolio_normalized_closing_values * allocations
    portfolio_normalized_values = portfolio_normalized_weighted_closing_values.sum(axis=1)

    # Calculate the portfolio statistics
    cumulative_return = portfolio_normalized_values[-1]
    daily_returns = tsu.returnize0(portfolio_normalized_values)
    ave_daily_return = daily_returns.mean()
    std_deviation = daily_returns.std()
    sharpe_ratio = tsu.get_sharpe_ratio(daily_returns, 0.0)[0]

    return start_date, end_date, symbols, allocations, sharpe_ratio, std_deviation, ave_daily_return, cumulative_return
Esempio n. 22
0
def getData(dt_start,dt_end,ls_name):
    # 1st Jan,2008 to 31st Dec, 2009.
    #dt_start = dt.datetime(2008, 1, 1)
    #dt_end = dt.datetime(2009, 12, 31)
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16))

    dataobj = da.DataAccess('Yahoo')

    #
    #ls_symbols = dataobj.get_symbols_from_list('sp5002012')
    ls_symbols = dataobj.get_symbols_from_list(ls_name)
    ls_symbols.append('SPY')
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
    ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))

    #remove nan
    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method = 'ffill')
        d_data[s_key] = d_data[s_key].fillna(method = 'bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)



    return d_data,ls_symbols
Esempio n. 23
0
def simulate(startdate, enddate, equities, allocations):

	# Date timestamps
	ldt_timestamps = du.getNYSEdays(startdate, enddate, dt.timedelta(hours = 16))

	# Data access
	c_dataobj = da.DataAccess('Yahoo')
	ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
	ldf_data = c_dataobj.get_data(ldt_timestamps, equities, ls_keys)
	d_data = dict(zip(ls_keys, ldf_data))

	# Calculate normalized close data
	close_data = d_data['close'].values
	normalized_close_data = close_data / close_data[0, :]

	# Make sure allocations is a column vector:
	allocations = np.array(allocations).reshape(len(allocations), 1)

	# Calculate portfolio close data
	portfolio_close_data = np.dot(normalized_close_data, allocations)

	# Calculate total return
	portfolio_close_data_copy = portfolio_close_data.copy()
	portfolio_normalized_cumulative_daily_return = np.sum(portfolio_close_data_copy, axis = 1)
	cum_ret = portfolio_normalized_cumulative_daily_return[-1]

	# Calculate volatility, average daily return and sharpe ratio
	portfolio_close_data_copy = portfolio_close_data.copy()
	tsu.returnize0(portfolio_close_data_copy)
	avg_daily_ret = np.mean(portfolio_close_data_copy)
	std_dev = np.std(portfolio_close_data_copy)
	sharpe = np.sqrt(252) * avg_daily_ret / std_dev

	return std_dev, avg_daily_ret, sharpe, cum_ret
def calculate_returns():
    global values_matrix

    a = uniqueDates[1]
    b = uniqueDates[-1] + dt.timedelta(days=1)
    ldt_timestamps = du.getNYSEdays(a, b, dt.timedelta(hours=16))

    print "calc", ldt_timestamps[0]
    print "calc", ldt_timestamps[-1]
    #dateList = []
    #for x in range (0, numdays):
    #    dateList.append(a + dt.timedelta(days = x))

    print "------", uniqueSymbols
    values_matrix = pd.DataFrame(index=ldt_timestamps, columns=uniqueSymbols)
    values_matrix = values_matrix.fillna(0)

    for date in ldt_timestamps:
        #print d_data['close'].loc[date+dt.timedelta(hours=16)]
        #print holding_matrix.loc[date]
        #print date
        #print d_data['close'].loc[date]
        a = d_data['close'].loc[date]
        index = bisect.bisect(uniqueDates,date)
        # print date
        # print uniqueDates[index-1]
        b = holding_matrix.loc[uniqueDates[index-1]]
        #print a
        #print b
        c = a.mul(b)
        #print c
        values_matrix.loc[date]=c
def perform_step2():
    global uniqueDates
    uniqueDates.sort()
    # print uniqueDates

    dt_start = uniqueDates[0]
    dt_end = uniqueDates[-1] + dt.timedelta(days=1)
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16))

    dataobj = da.DataAccess('Yahoo')
    ls_symbols = uniqueSymbols
    # ls_symbols = dataobj.get_symbols_from_list('sp5002008')
    # ls_symbols.append('SPX')
    ls_symbols.append('_CASH')

    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'close']
    ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)

    global d_data
    d_data = dict(zip(ls_keys, ldf_data))

    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method='ffill')
        d_data[s_key] = d_data[s_key].fillna(method='bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)

    d_data['_CASH'] = 1.0
    uniqueDates = [0] + uniqueDates
def getPrices(startDate, endDate, symbols, fields, fillna=True, isSymbolsList=False, includeLastDay=True):
    """
     reads stock prices from Yahoo
     the prices returned INCLUDE the endDate
     @param isSymbolsList: whether the symbols passed in is a stock symbol or a list symbol (e.g. sp5002012).
                           If true, symbols can contain only one symbol.
     @return prices with NaNs filled (forward, backward, 1.0)
    """

    assert not isSymbolsList or isinstance(symbols, str) or len(symbols) == 1, \
        'When isSymbolsList is true, symbols can only contain one symbol.'

    if includeLastDay:
        endDate += timedelta(days=1)

    dataReader = DataAccess('Yahoo')
    timeStamps = getNYSEdays(startDate, endDate, timedelta(hours=16))

    if isSymbolsList:
        symbols = dataReader.get_symbols_from_list(symbols if isinstance(symbols, str) else symbols[0])

    data = dataReader.get_data(timeStamps, symbols, fields)

    if fillna:
        data = fillNA(data)

#    data.index = pd.Series(data.index) - timedelta(hours=16)  # remove 16 from the dates

    return data
def fetchNYSEData(dt_start, dt_end, ls_symbols):
	
    # The Time of Closing is 1600 hrs 
    dt_timeofday = dt.timedelta(hours=16)
    
    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)

    # Keys to be read from the data, it is good to read everything in one go.
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']

    # Reading the data, now d_data is a dictionary with the keys above.
    # Timestamps and symbols are the ones that were specified before.
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))
    
    timestampsForNYSEDays = d_data['close'].index

    # Filling the data for NAN
    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method='ffill')
        d_data[s_key] = d_data[s_key].fillna(method='bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)

    # Getting the numpy ndarray of close prices.
    na_price = d_data['close'].values
	
    # returning the closed prices for all the days    
    return na_price, ldt_timestamps
Esempio n. 28
0
def main():
    dt_start = dt.datetime(2008, 1, 1)
    dt_end = dt.datetime(2009, 12, 31)
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16))

    global dataObj

    ls_symbols_2012 = dataObj.get_symbols_from_list('sp5002012')
    ls_symbols_2012.append('SPY')

    ls_symbols_2008 = dataObj.get_symbols_from_list('sp5002008')
    ls_symbols_2008.append('SPY')

    #lf_priceDrop2008 = [5.0, 6.0, 7.0, 8.0, 9.0, 10.0]
    #lf_priceDrop2012 = [5.0, 6.0, 7.0, 8.0, 9.0, 10.0]

    lf_priceDrop2008 = [7.0, 8.0, 10.0]
    lf_priceDrop2012 = [6.0, 7.0, 9.0, 10.0]

    try:
        #thread.start_new_thread(create_study, (ls_symbols_2008, ldt_timestamps, '2008StudyPriceDrop',lf_priceDrop2008))
        #thread.start_new_thread(create_study, (ls_symbols_2012, ldt_timestamps, '2012StudyPriceDrop',lf_priceDrop2012))
        create_study(ls_symbols_2008, ldt_timestamps, '2008StudyPriceDrop',lf_priceDrop2008)
        create_study(ls_symbols_2012, ldt_timestamps, '2012StudyPriceDrop',lf_priceDrop2012)
    except:
        print "Error: unable to start thread"
Esempio n. 29
0
def simulate(startdate, enddate, ls_symbols, allocation):
    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('Yahoo',cachestalltime=0)
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']

    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))

    # You need to normalize
    na_price = d_data['close'].values / d_data['close'].values[0]
    na_price *= allocation
    na_price = na_price.sum(axis=1)

    r = tsu.returnize0(na_price.copy())
    avg_daily_return = np.average(r)
    std = np.std(r)

    sharpe_ratio = avg_daily_return/std *math.sqrt(252)

    cul_return = na_price[-1] / na_price[0]
    return std,avg_daily_return,sharpe_ratio,cul_return
Esempio n. 30
0
def run(dt_start, dt_end, ls_symbols, alloc):
    dt_timeofday = dt.timedelta(hours=16)
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
    c_dataobj = da.DataAccess('Yahoo')
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))
    na_price = d_data['close'].values
    na_normalized_prices = na_price / na_price[0, :]
    na_daily_returns = tsu.returnize0(na_normalized_prices.copy())
    na_allocated_daily_ret = na_daily_returns * alloc
    print 'Allocated-Adjusted Daily Returns'
    print na_allocated_daily_ret
    na_total_daily_ret = np.sum(na_allocated_daily_ret, 1)
    print 'Total Daily Returns: '
    print na_total_daily_ret
    std_dev = np.std(na_total_daily_ret)
    print 'Standard Deviation (Vol): ' + str(std_dev)
    avg_daily_ret = np.average(na_total_daily_ret)
    print 'Average Daily Return: ' + str(avg_daily_ret)
    sharpe = calc_sharpe_ratio(avg_daily_ret, std_dev)
    print 'Sharpe: ' + str(sharpe)
    cum_ret = calc_cum_return(na_total_daily_ret)
    print 'Cumulative Return: ' + str(cum_ret)
    return std_dev, avg_daily_ret, sharpe, cum_ret
Esempio n. 31
0
def simulate(start_date, end_date, ls_symbols):
    print "Start Date: %s" % start_date
    print "End Date: %s" % end_date
    print "Symbols: %s" % ls_symbols

    dt_timeofday = dt.timedelta(hours=16)
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)

    # get data
    c_dataobj = da.DataAccess('Yahoo')
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))

    # fill forward
    df_rets = d_data['close'].copy()
    df_rets = df_rets.fillna(method='ffill')
    df_rets = df_rets.fillna(method='bfill')

    # extract prices as values
    na_price = df_rets.values

    # calculate normalized prices as cumulative return
    na_normalized_price = na_price / na_price[0, :]
    #print na_normalized_price

    # create array with legal combinations of the portfolio
    shares = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    legal_ports = []
    set = [p for p in itertools.product(shares, repeat=len(ls_symbols))]
    for x in set:
        if sum(x) == 1.0:
            legal_ports.append(list(x))

    #print legal_ports

    highest_sharpe = 0
    for comb in legal_ports:
        na_port_rets = np.sum(na_normalized_price * comb, axis=1)
        #print na_port_rets

        normalized_portfolio_return = na_port_rets.copy()
        port_daily_rets = tsu.returnize0(normalized_portfolio_return)
        #print port_daily_rets
        #na_port_daily_rets = np.sum(daily_ret*ls_alloc, axis=1)

        # calculate metrics
        port_mean = mean(port_daily_rets.copy())
        port_std = std(port_daily_rets.copy())
        sharpe_port = sqrt(252) * (port_mean / port_std.copy())
        if sharpe_port > highest_sharpe:
            highest_sharpe = sharpe_port.copy()
            opt_alloc = comb

    print "Volatility (stdev of daily returns): %f" % port_std
    print "Sharpe Ratio: %f" % highest_sharpe
    print "Optimal Alloc: %s" % opt_alloc
    print "Average daily return: %f" % port_mean
    print "Cumulative Return: %f" % na_port_rets[-1]

    return opt_alloc
Esempio n. 32
0
orders_csv_pd = orders_csv_pd.sort(['Date'])
orders_csv_pd = orders_csv_pd.reset_index(drop=True)
orders_csv_np = orders_csv_pd.values[:, 1:]
syms = set(orders_csv_np[:, 1])

for i in range(len(orders_csv_pd['Date'])):
    orders_csv_pd['Date'][i] = parser.parse(orders_csv_pd['Date'][i])

orders_csv_pd['Date'] = pd.DatetimeIndex(orders_csv_pd['Date'])

start_date = orders_csv_pd['Date'].min() - dt.timedelta(hours=16)
end_date = orders_csv_pd['Date'].max()
keys = ['actual_close', 'close']
database = da.DataAccess('Yahoo')
closetime = dt.timedelta(hours=16)
opentimes = du.getNYSEdays(start_date, end_date, closetime)
prices = database.get_data(opentimes, syms, keys)
prices = dict(zip(keys, prices))
prices_close = prices['close']
prices_close = prices_close.fillna(method='ffill')
prices_close = prices_close.fillna(method='bfill')

cash = 50000
casharray = copy.deepcopy(prices_close) * 0
casharray = casharray[list(syms)[0]]
own = copy.deepcopy(prices_close) * 0
valuearray = casharray.copy()

for row in range(len(orders_csv_pd)):
    if orders_csv_pd.ix[row]['Order'] == 'Buy':
        own[orders_csv_pd.ix[row]['Sym']][
Esempio n. 33
0
@author: Glacier
'''
import QSTK.qstkutil.qsdateutil as du
import QSTK.qstkutil.DataAccess as da

import datetime as dt
import matplotlib.pyplot as plt
import pandas
import numpy as np

dataobj=da.DataAccess('ML4Trading')
ls_symbols=dataobj.get_all_symbols()
#print "All symbols: ",ls_symbols

symbols_toread=['ML4T-000']

'''
ldt_timestamps=[]
ldt_timestamps.append(dt.datetime(2012,9,12,16))
'''

dtstart=dt.datetime(2012,8,01)
dtend=dt.datetime(2012,9,13)
ldttimestamps=du.getNYSEdays(dtstart,dtend,dt.timedelta(hours=16))

lsKeys = ['open', 'high', 'low', 'close', 'volume']
# square bracket will be wrong
ldfdata=dataobj.get_data(ldttimestamps,symbols_toread,lsKeys)
#df_close = dataobj.get_data(ldttimestamps, symbols_toread, lsKeys)
ldfdata=np.array(ldfdata[0])
print ldfdata
                               delimiter=',',
                               skipinitialspace=True)
     for row in order_reader:
         date = dt.datetime(int(row[0]), int(row[1]), int(row[2]), 16)
         o = Order(action=row[4], date=date, tick=row[3], shares=row[5])
         order_list.append(o)
 # order_list needs to be sorted. Otherwise the algorithm won't work.
 date_list = [x.date for x in order_list]
 date_list.sort()
 dt_start = date_list[0]
 dt_end = date_list[-1]
 tick_set = sets.Set([x.tick for x in order_list])
 ls_symbols = ['$SPX']
 while (tick_set):
     ls_symbols.append(tick_set.pop())
 ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
 all_stocks = get_tickdata(ls_symbols=ls_symbols,
                           ldt_timestamps=ldt_timestamps)
 pf = Portfolio(equities=all_stocks,
                cash=cash,
                dates=ldt_timestamps,
                order_list=order_list)
 pf.sim()
 equity_col = ['buy', 'sell', 'close']
 pf.csvwriter(csv_file=value_file, d=',', cash=False)
 print "Details of the Performance of the portfolio :"
 print "Data Range :", ldt_timestamps[0], "to", ldt_timestamps[-1]
 print "Sharpe Ratio of Fund :", pf.sharpe()
 print "Sortino Ratio of Fund :", pf.sortino()
 print "Sharpe Ratio of $SPX :", pf.equities['$SPX'].sharpe()
 print "Total Return of Fund :", pf.totalrtn()
Esempio n. 35
0
def calculateBBands(startdate="January 1,2010", enddate="December 31, 2010", ls_symbols = ["GOOG"], lookBack = 20, plot=False):
    
    # Start and End date of the charts
    dt_start = dtParser.parse(startdate)
    #print dt_start
    dt_end=dtParser.parse(enddate)
    #print dt_end

    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
    ldt_timestamps.sort()
    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('Yahoo')

    # Keys to be read from the data, it is good to read everything in one go.
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']

    # Reading the data, now d_data is a dictionary with the keys above.
    # Timestamps and symbols are the ones that were specified before.
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    #define dictionary (relate ls_keys to data columns)
    d_data = dict(zip(ls_keys, ldf_data))

    # Filling the data for NAN
    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method='ffill')
        d_data[s_key] = d_data[s_key].fillna(method='bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)

    # Getting the numpy ndarray of close prices.
    na_price = d_data['close'].values

    rolling_mean = pd.rolling_mean(na_price, lookBack)
    rolling_std = pd.rolling_std(na_price, lookBack)
    up_band = rolling_mean + rolling_std
    low_band = rolling_mean - rolling_std

    boll_val = (na_price-rolling_mean)/rolling_std
    

    dt_boll_val = pd.DataFrame(boll_val)
    dt_boll_val.index=ldt_timestamps
    dt_boll_val.columns=ls_symbols

    print dt_boll_val.tail(5)
    
    
    if plot==True:
        fig, axes = plt.subplots(nrows=2)
        # Plotting the plot of daily returns
        plt.clf()
        plt.figure(1)
        plt.subplot(211)
        plt.plot(ldt_timestamps, up_band)  
        plt.plot(ldt_timestamps, na_price)  
        plt.plot(ldt_timestamps, low_band) 
        plt.axhline(y=0, color='r')
        plt.legend(['up','Prices','low'], loc=4)
        plt.ylabel('Rolling Mean')
        plt.xlabel('Dates')

        plt.subplot(212)
        plt.plot(ldt_timestamps, up_band)  
        plt.plot(ldt_timestamps, rolling_mean)  
        plt.plot(ldt_timestamps, low_band) 
        plt.axhline(y=0, color='r')
        plt.legend(['up', 'rolling Mean','low'], loc=4)
        plt.ylabel('BBands')
        plt.xlabel('Dates')
        plt.savefig('bbands.pdf', format='pdf')

    return dt_boll_val
Esempio n. 36
0
import pandas as pd
import time, sys
import numpy as np
import argparse

def datetimeNumpy(array):
    return dt.datetime(int(float(array[0])), int(float(array[1])), int(float(array[2])), 16)

if __name__ == '__main__':
    inputFile =  sys.argv[1]
    symbol = sys.argv[2]
    # Year, month, Symbol, Order, amount
    value = np.loadtxt(inputFile, delimiter=',', dtype='str' )
    dates = np.apply_along_axis( datetimeNumpy, axis=1, arr=value )
    start, end = min(dates), max(dates)
    ldt=du.getNYSEdays(start, end, dt.timedelta(hours=16))
    c_dataobj = da.DataAccess('Yahoo')
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
    ldf_data = c_dataobj.get_data(ldt, [symbol], ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))
    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method='ffill')
        d_data[s_key] = d_data[s_key].fillna(method='bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)
    na_price = d_data['close']
    na_price['Custom'] = value[:,3]
    na_price=na_price.values.astype(float)
    na_normalized_price = na_price / na_price[0, :]

    # Plotting the prices with x-axis=timestamps
    plt.clf()
def main():
    ''' Main Function'''
    # Reading the portfolio
    na_portfolio = np.loadtxt('tutorial3portfolio.csv',
                              dtype='S5,f4',
                              delimiter=',',
                              comments="#",
                              skiprows=1)
    print(na_portfolio)

    # Sorting the portfolio by symbol name
    na_portfolio = sorted(na_portfolio, key=lambda x: x[0])
    print(na_portfolio)

    # Create two list for symbol names and allocation
    ls_port_syms = []
    lf_port_alloc = []
    for port in na_portfolio:
        ls_port_syms.append(port[0])
        lf_port_alloc.append(port[1])

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('Yahoo')
    ls_all_syms = c_dataobj.get_all_symbols()
    # Bad symbols are symbols present in portfolio but not in all syms
    ls_bad_syms = list(set(ls_port_syms) - set(ls_all_syms))

    if len(ls_bad_syms) != 0:
        print("Portfolio contains bad symbols : ", ls_bad_syms)

    for s_sym in ls_bad_syms:
        i_index = ls_port_syms.index(s_sym)
        ls_port_syms.pop(i_index)
        lf_port_alloc.pop(i_index)

    # Reading the historical data.
    dt_end = dt.datetime(2011, 1, 1)
    dt_start = dt_end - dt.timedelta(days=1095)  # Three years
    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)

    # Keys to be read from the data, it is good to read everything in one go.
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']

    # Reading the data, now d_data is a dictionary with the keys above.
    # Timestamps and symbols are the ones that were specified before.
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_port_syms, ls_keys)
    d_data = dict(list(zip(ls_keys, ldf_data)))

    # Copying close price into separate dataframe to find rets
    df_rets = d_data['close'].copy()
    # Filling the data.
    df_rets = df_rets.fillna(method='ffill')
    df_rets = df_rets.fillna(method='bfill')
    df_rets = df_rets.fillna(1.0)

    # Numpy matrix of filled data values
    na_rets = df_rets.values
    # returnize0 works on ndarray and not dataframes.
    tsu.returnize0(na_rets)

    # Estimate portfolio returns
    na_portrets = np.sum(na_rets * lf_port_alloc, axis=1)
    na_port_total = np.cumprod(na_portrets + 1)
    na_component_total = np.cumprod(na_rets + 1, axis=0)

    # Plotting the results
    plt.clf()
    fig = plt.figure()
    fig.add_subplot(111)
    plt.plot(ldt_timestamps, na_component_total, alpha=0.4)
    plt.plot(ldt_timestamps, na_port_total)
    ls_names = ls_port_syms
    ls_names.append('Portfolio')
    plt.legend(ls_names)
    plt.ylabel('Cumulative Returns')
    plt.xlabel('Date')
    fig.autofmt_xdate(rotation=45)
    plt.savefig('tutorial3.pdf', format='pdf')
Esempio n. 38
0
def main():
    """
	This demo is for simulating the strategy
	Variables
	"""
    dt_start = dt.datetime(2008, 1, 1)
    dt_end = dt.datetime(2009, 12, 31)

    sym_list = 'sp5002012.txt'
    market_sym = 'SPY'

    starting_cash = 100000
    bol_period = 20

    print "Setting Up ..."
    # Obtatining data from Yahoo
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16))
    dataobj = da.DataAccess('Yahoo')
    ls_symbols = load_symlists(sym_list)
    ls_symbols.append(market_sym)
    """
	key values. Creating a dictionary.
	"""
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
    ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))
    """
	fill out N/A values
	"""
    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method='ffill')
        d_data[s_key] = d_data[s_key].fillna(method='bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)
    """
	df_close contains only a close column.
	"""
    df_close = d_data['close']
    df_volume = d_data['volume']

    print "Finding Events ..."
    ''' 
	Finding the event dataframe 
	'''
    ts_market = df_close['SPY']

    # Creating an empty dataframe
    df_events = copy.deepcopy(df_close) * 0

    # Time stamps for the event range
    ldt_timestamps = df_close.index

    rolling_mean = pd.rolling_mean(df_close, window=bol_period)
    rolling_std = pd.rolling_std(df_close, window=bol_period)

    rolling_mean_vol = pd.rolling_mean(df_volume, window=bol_period)
    rolling_std_vol = pd.rolling_std(df_volume, window=bol_period)
    '''
	finding_events starts here
	'''

    bol_clo = (df_close - rolling_mean) / rolling_std

    for s_sym in ls_symbols:
        for i in range(1, len(ldt_timestamps) - 5):
            bol_tod = bol_clo[s_sym].loc[ldt_timestamps[i]]
            bol_yes = bol_clo[s_sym].loc[ldt_timestamps[i - 1]]
            bol_tod_mark = bol_clo["SPY"].loc[ldt_timestamps[i]]

            if (bol_tod <= -2.0 and bol_yes >= -2.0 and bol_tod_mark >= 1.0):
                for delay in range(5):
                    df_events[s_sym].loc[ldt_timestamps[i + delay]] += (
                        10000.00 / df_close[s_sym].loc[ldt_timestamps[i]])
                    if df_close[s_sym].loc[ldt_timestamps[
                            i +
                            delay]] > df_close[s_sym].loc[ldt_timestamps[i]]:
                        break

    print "Starting Simulation ..."

    ls_symbols_red = []

    for sym in ls_symbols:
        for i in range(len(ldt_timestamps)):
            if df_events[sym].loc[ldt_timestamps[i]] > 0:
                ls_symbols_red.append(sym)
                break
    '''
	value and cash are zero arrays
	'''
    # df_orders = copy.deepcopy(df_events)
    print "ls_symbols_red", ls_symbols_red
    df_orders = df_events[ls_symbols_red]
    value = copy.deepcopy(df_events) * 0
    cash = copy.deepcopy(value[market_sym])
    '''
	Update value
	'''
    print "Updating Value and Cash Array..."
    for s_sym in ls_symbols_red:
        for i in range(len(ldt_timestamps)):
            ind_time = ldt_timestamps[i]
            if i == 0:
                if df_orders[s_sym].loc[ind_time] > 0:
                    sym_value = df_orders[s_sym].loc[ind_time] * df_close[
                        s_sym].loc[ind_time]
                    value[s_sym].loc[ind_time] = sym_value
                    cash[ind_time] -= sym_value
            else:
                ind_time_yest = ldt_timestamps[i - 1]
                if df_orders[s_sym].loc[ind_time] > 0 and df_orders[s_sym].loc[
                        ind_time_yest] == 0:
                    sym_value = df_orders[s_sym].loc[ind_time] * df_close[
                        s_sym].loc[ind_time]
                    value[s_sym].loc[ind_time] = sym_value
                    cash[ind_time] -= sym_value
                elif df_orders[s_sym].loc[ind_time_yest] > 0:
                    sym_value = df_orders[s_sym].loc[ind_time] * df_close[
                        s_sym].loc[ind_time]
                    value[s_sym].loc[ind_time] = sym_value
                    cash[ind_time] -= (df_orders[s_sym].loc[ind_time] -
                                       df_orders[s_sym].loc[ind_time_yest]
                                       ) * df_close[s_sym].loc[ind_time_yest]
    '''
	Update cash
	'''
    print "Modifying Cash Array..."
    cash[ldt_timestamps[0]] += starting_cash
    for i in range(1, len(ldt_timestamps)):
        ind_prev = cash[ldt_timestamps[i - 1]]
        ind_curr = cash[ldt_timestamps[i]]
        cash[ldt_timestamps[i]] = ind_curr + ind_prev

    cash.to_csv("c:/cash.csv", sep=",", mode="w")
    value.to_csv("c:/value.csv", sep=",", mode="w")

    print "Updating Total..."
    for i in range(len(ldt_timestamps)):
        sym_sum = 0
        for s_sym in ls_symbols_red:
            sym_sum += value[s_sym].ix[ldt_timestamps[i]]
        cash[ldt_timestamps[i]] += sym_sum

    cash_raw = copy.deepcopy(cash)

    cash.to_csv("c:/total.csv", sep=",", mode="w")
    ts_market.to_csv("c:/ts_market.csv", sep=",", mode="w")

    cash /= cash[0]
    ts_market /= ts_market[0]

    print "Calculating Total Return..."
    tot_ret_fund = cash[-1]
    tot_ret_mark = ts_market[-1]

    print "Calculating Volatility..."
    '''
	Create new array for fund and market
	'''
    daily_ret_fund = np.zeros((len(ldt_timestamps), 1))
    daily_ret_mark = copy.deepcopy(daily_ret_fund)

    for i in range(1, len(ldt_timestamps)):
        daily_ret_fund[
            i] = cash[ldt_timestamps[i]] / cash[ldt_timestamps[i - 1]] - 1
        daily_ret_mark[i] = ts_market[ldt_timestamps[i]] / ts_market[
            ldt_timestamps[i - 1]] - 1

    vol_fund = np.std(daily_ret_fund)
    vol_mark = np.std(daily_ret_mark)

    print "Calculating Average Daily Return..."
    avg_ret_fund = np.average(daily_ret_fund)
    avg_ret_mark = np.average(daily_ret_mark)

    print "Calculating Sharpe Ratio..."
    sharpe_fund = np.sqrt(252) * avg_ret_fund / vol_fund
    sharpe_mark = np.sqrt(252) * avg_ret_mark / vol_mark

    print "Start Date:", dt_start
    print "End Date  :", dt_end
    print " "
    print "Sharpe Ratio of Fund: ", sharpe_fund
    print "Sharpe Ratio of $SPX: ", sharpe_mark
    print " "
    print "Total Return of Fund: ", tot_ret_fund
    print "Total Return of $SPX: ", tot_ret_mark
    print " "
    print "Standard Deviation of Fund: ", vol_fund
    print "Standard Deviation of $SPX: ", vol_mark
    print " "
    print "Average Daily Return of Fund: ", avg_ret_fund
    print "Average Daily Return of $SPX: ", avg_ret_mark

    plt.plot(cash.index, cash, 'r', ts_market.index, ts_market, 'b')
    plt.show()
Esempio n. 39
0
def main():
    ''' Main Function'''

    # List of symbols
    ls_symbols = ["AAPL", "GLD", "GOOG", "$SPX", "XOM"]

    # Start and End date of the charts
    dt_start = dt.datetime(2010, 1, 1)
    dt_end = dt.datetime(2010, 1, 15)

    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('Yahoo')

    # Keys to be read from the data, it is good to read everything in one go.
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']

    # Reading the data, now d_data is a dictionary with the keys above.
    # Timestamps and symbols are the ones that were specified before.
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))

    # Filling the data for NAN
    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method='ffill')
        d_data[s_key] = d_data[s_key].fillna(method='bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)

    # Getting the numpy ndarray of close prices.
    na_price = d_data['close'].values

    # Plotting the prices with x-axis=timestamps
    plt.clf()
    plt.plot(ldt_timestamps, na_price)
    plt.legend(ls_symbols)
    plt.ylabel('Adjusted Close')
    plt.xlabel('Date')
    plt.savefig('adjustedclose.pdf', format='pdf')

    # Normalizing the prices to start at 1 and see relative returns
    na_normalized_price = na_price / na_price[0, :]

    # Plotting the prices with x-axis=timestamps
    plt.clf()
    plt.plot(ldt_timestamps, na_normalized_price)
    plt.legend(ls_symbols)
    plt.ylabel('Normalized Close')
    plt.xlabel('Date')
    plt.savefig('normalized.pdf', format='pdf')

    # Copy the normalized prices to a new ndarry to find returns.
    na_rets = na_normalized_price.copy()

    # Calculate the daily returns of the prices. (Inplace calculation)
    # returnize0 works on ndarray and not dataframes.
    tsu.returnize0(na_rets)

    # Plotting the plot of daily returns
    plt.clf()
    plt.plot(ldt_timestamps[0:50], na_rets[0:50, 3])  # $SPX 50 days
    plt.plot(ldt_timestamps[0:50], na_rets[0:50, 4])  # XOM 50 days
    plt.axhline(y=0, color='r')
    plt.legend(['$SPX', 'XOM'])
    plt.ylabel('Daily Returns')
    plt.xlabel('Date')
    plt.savefig('rets.pdf', format='pdf')

    # Plotting the scatter plot of daily returns between XOM VS $SPX
    plt.clf()
    plt.scatter(na_rets[:, 3], na_rets[:, 4], c='blue')
    plt.ylabel('XOM')
    plt.xlabel('$SPX')
    plt.savefig('scatterSPXvXOM.pdf', format='pdf')

    # Plotting the scatter plot of daily returns between $SPX VS GLD
    plt.clf()
    plt.scatter(na_rets[:, 3], na_rets[:, 1], c='blue')  # $SPX v GLD
    plt.ylabel('GLD')
    plt.xlabel('$SPX')
    plt.savefig('scatterSPXvGLD.pdf', format='pdf')
def main(argv):
    orders_file = argv[0]
    values_file = argv[1]

    symbols = []
    dates = []
    order_file = []

    reader = csv.reader(open(orders_file, 'rU'), delimiter=",")
    for row in reader:
        order_file.append(row)
        symbols.append(row[3])
        dates.append(map(int, row[:3]))

    symbols.append("_CASH")
    uniq_sym = sorted(list(set(symbols)))

    dt_start = dt.datetime(dates[0][0], dates[0][1], dates[0][2])
    dt_end = dt.datetime(dates[-1][0], dates[-1][1], dates[-1][2])
    dt_end_read = dt_end + dt.timedelta(days=1)

    data_obj = da.DataAccess('Yahoo')
    ls_keys = ['close', 'actual_close']
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end_read,
                                    dt.timedelta(hours=16))

    ldf_data = data_obj.get_data(ldt_timestamps, uniq_sym, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))

    trades_data = pd.DataFrame(index=list(ldt_timestamps),
                               columns=list(uniq_sym))
    curr_stocks = dict()

    for sym in uniq_sym:
        curr_stocks[sym] = 0
        trades_data[sym][ldt_timestamps[0]] = 0

    curr_cash = 1000000
    trades_data["_CASH"][ldt_timestamps[0]] = curr_cash

    for index, row in enumerate(order_file):
        curr_date = dt.datetime(dates[index][0], dates[index][1],
                                dates[index][2], 16)
        sym = row[3]
        stock_value = int(d_data['close'][sym][curr_date])
        quantity = int(row[5])
        position = row[4]

        if position == "Buy":
            curr_cash -= stock_value * quantity
            trades_data["_CASH"][curr_date] = curr_cash
            curr_stocks[sym] += quantity
            trades_data[sym][curr_date] = curr_stocks[sym]

        else:
            curr_cash += stock_value * quantity
            trades_data["_CASH"][curr_date] = curr_cash
            curr_stocks[sym] -= quantity
            trades_data[sym][curr_date] = curr_stocks[sym]

    trades_data = trades_data.fillna(method="pad")

    writer = csv.writer(open(values_file, 'wb'), delimiter=',')

    for curr_date in trades_data.index:
        value_of_portfolio = 0

        for sym in uniq_sym:
            if sym == "_CASH":
                value_of_portfolio += trades_data[sym][curr_date]
            else:
                value_of_portfolio += trades_data[sym][curr_date] * int(
                    d_data['close'][sym][curr_date])

        writer.writerow([curr_date, value_of_portfolio])
Esempio n. 41
0
    values_fund, stddev_fund, avgret_fund, sharpe_fund, return_fund = calculate_for(
        fund_values)

    print "The final value of the portfolio using the sample file is -- %s" % last_line
    print ""
    print "Details of the Performance of the portfolio"
    print ""
    print "Data Range :  %s  to  %s" % (start_date, end_date)
    print ""
    print "Sharpe Ratio of Fund : %f" % sharpe_fund
    print "Sharpe Ratio of %s : %f" % (comparison[0], sharpe_comp)
    print ""
    print "Total Return of Fund : %f" % return_fund
    print "Total Return of %s : %f" % (comparison[0], return_comp)
    print ""
    print "Standard Deviation of Fund : %f" % stddev_fund
    print "Standard Deviation of %s : %f" % (comparison[0], stddev_comp)
    print ""
    print "Average Daily Return of Fund : %f" % avgret_fund
    print "Average Daily Return of %s : %f" % (comparison[0], avgret_comp)

    ldt_timestamps = du.getNYSEdays(start_date, end_date + dt.timedelta(1),
                                    dt.timedelta(hours=16))

    plt.plot(ldt_timestamps, values_fund, label='Portfolio')
    plt.plot(ldt_timestamps, values_comp, label=comparison[0])
    plt.legend()
    plt.ylabel('Returns')
    plt.xlabel('Date')
    plt.savefig('homework3.pdf', format='pdf')
Esempio n. 42
0
	
	f = open('2008Dow30.txt')
	lsSymTrain = f.read().splitlines() + ['$SPX']
	f.close()
	
	f = open('2010Dow30.txt')
	lsSymTest = f.read().splitlines() + ['$SPX']
	f.close()
	
	lsSym = list(set(lsSymTrain).union(set(lsSymTest)))
	
	dtStart = dt.datetime(2008,0o1,0o1)
	dtEnd = dt.datetime(2010,12,31)
	
	norObj = da.DataAccess('Norgate')	  
	ldtTimestamps = du.getNYSEdays( dtStart, dtEnd, dt.timedelta(hours=16) )	
	
	lsKeys = ['open', 'high', 'low', 'close', 'volume']
	
	ldfData = norObj.get_data( ldtTimestamps, lsSym, lsKeys ) #this line is important even though the ret value is not used
	
	for temp in ldfData:
		temp.fillna(method="ffill").fillna(method="bfill")
	
	ldfDataTrain = norObj.get_data( ldtTimestamps, lsSymTrain, lsKeys )
	ldfDataTest = norObj.get_data( ldtTimestamps, lsSymTest, lsKeys)
	
	for temp in ldfDataTrain:
		temp.fillna(method="ffill").fillna(method="bfill")
		
	for temp in ldfDataTest:
Esempio n. 43
0
import QSTK.qstkutil.qsdateutil as du
import QSTK.qstkutil.tsutil as tsu
import QSTK.qstkutil.DataAccess as da
import datetime as dt
import matplotlib.pyplot as plt
import pandas
from pylab import *

#
# Prepare to read the data
#
symbols = ["AAPL","GOOG","IBM","MSFT"]
startday = dt.datetime(2010,1,1)
endday = dt.datetime(2010,12,31)
timeofday=dt.timedelta(hours=16)
timestamps = du.getNYSEdays(startday,endday,timeofday)

dataobj = da.DataAccess('Yahoo')
voldata = dataobj.get_data(timestamps, symbols, "volume")
adjcloses = dataobj.get_data(timestamps, symbols, "close")
actualclose = dataobj.get_data(timestamps, symbols, "actual_close")

#adjcloses = adjcloses.fillna()
adjcloses = adjcloses.fillna(method='backfill')

rolling_mean = pandas.rolling_mean(adjcloses,20,min_periods=20)
rolling_std = pandas.rolling_std(adjcloses,20,min_periods=20)
bollinger_val = pandas.DataFrame(index=timestamps,columns=symbols)

for i in range(len(rolling_std[symbols[0]])):
    if rolling_std[symbols[0]][i] > 0:
Esempio n. 44
0
def yahoo_read_data(comparison):
    ldt_timestamps = du.getNYSEdays(start_date, end_date + dt.timedelta(1),
                                    dt.timedelta(hours=16))
    data_obj = da.DataAccess('Yahoo')
    return data_obj.get_data(ldt_timestamps, comparison, ["close"])[0].values
Esempio n. 45
0
def get_nyse_days_of_market_open_between(start_of_period, end_of_period):
    time_of_day = dt.timedelta(hours=16)
    return du.getNYSEdays(start_of_period, end_of_period, time_of_day)
Esempio n. 46
0
def log500(sLog):
    '''
    @summary: Loads cached features.
    @param sLog: Filename of features.
    @return: Nothing, logs features to desired location
    '''

    lsSym = [
        'A', 'AA', 'AAPL', 'ABC', 'ABT', 'ACE', 'ACN', 'ADBE', 'ADI', 'ADM',
        'ADP', 'ADSK', 'AEE', 'AEP', 'AES', 'AET', 'AFL', 'AGN', 'AIG', 'AIV',
        'AIZ', 'AKAM', 'AKS', 'ALL', 'ALTR', 'AMAT', 'AMD', 'AMGN', 'AMP',
        'AMT', 'AMZN', 'AN', 'ANF', 'ANR', 'AON', 'APA', 'APC', 'APD', 'APH',
        'APOL', 'ARG', 'ATI', 'AVB', 'AVP', 'AVY', 'AXP', 'AZO', 'BA', 'BAC',
        'BAX', 'BBBY', 'BBT', 'BBY', 'BCR', 'BDX', 'BEN', 'BF.B', 'BHI', 'BIG',
        'BIIB', 'BK', 'BLK', 'BLL', 'BMC', 'BMS', 'BMY', 'BRCM', 'BRK.B',
        'BSX', 'BTU', 'BXP', 'C', 'CA', 'CAG', 'CAH', 'CAM', 'CAT', 'CB',
        'CBG', 'CBS', 'CCE', 'CCL', 'CEG', 'CELG', 'CERN', 'CF', 'CFN', 'CHK',
        'CHRW', 'CI', 'CINF', 'CL', 'CLF', 'CLX', 'CMA', 'CMCSA', 'CME', 'CMG',
        'CMI', 'CMS', 'CNP', 'CNX', 'COF', 'COG', 'COH', 'COL', 'COP', 'COST',
        'COV', 'CPB', 'CPWR', 'CRM', 'CSC', 'CSCO', 'CSX', 'CTAS', 'CTL',
        'CTSH', 'CTXS', 'CVC', 'CVH', 'CVS', 'CVX', 'D', 'DD', 'DE', 'DELL',
        'DF', 'DFS', 'DGX', 'DHI', 'DHR', 'DIS', 'DISCA', 'DNB', 'DNR', 'DO',
        'DOV', 'DOW', 'DPS', 'DRI', 'DTE', 'DTV', 'DUK', 'DV', 'DVA', 'DVN',
        'EBAY', 'ECL', 'ED', 'EFX', 'EIX', 'EL', 'EMC', 'EMN', 'EMR', 'EOG',
        'EP', 'EQR', 'EQT', 'ERTS', 'ESRX', 'ETFC', 'ETN', 'ETR', 'EW', 'EXC',
        'EXPD', 'EXPE', 'F', 'FAST', 'FCX', 'FDO', 'FDX', 'FE', 'FFIV', 'FHN',
        'FII', 'FIS', 'FISV', 'FITB', 'FLIR', 'FLR', 'FLS', 'FMC', 'FO', 'FRX',
        'FSLR', 'FTI', 'FTR', 'GAS', 'GCI', 'GD', 'GE', 'GILD', 'GIS', 'GLW',
        'GME', 'GNW', 'GOOG', 'GPC', 'GPS', 'GR', 'GS', 'GT', 'GWW', 'HAL',
        'HAR', 'HAS', 'HBAN', 'HCBK', 'HCN', 'HCP', 'HD', 'HES', 'HIG', 'HNZ',
        'HOG', 'HON', 'HOT', 'HP', 'HPQ', 'HRB', 'HRL', 'HRS', 'HSP', 'HST',
        'HSY', 'HUM', 'IBM', 'ICE', 'IFF', 'IGT', 'INTC', 'INTU', 'IP', 'IPG',
        'IR', 'IRM', 'ISRG', 'ITT', 'ITW', 'IVZ', 'JBL', 'JCI', 'JCP', 'JDSU',
        'JEC', 'JNJ', 'JNPR', 'JNS', 'JOYG', 'JPM', 'JWN', 'K', 'KEY', 'KFT',
        'KIM', 'KLAC', 'KMB', 'KMX', 'KO', 'KR', 'KSS', 'L', 'LEG', 'LEN',
        'LH', 'LIFE', 'LLL', 'LLTC', 'LLY', 'LM', 'LMT', 'LNC', 'LO', 'LOW',
        'LSI', 'LTD', 'LUK', 'LUV', 'LXK', 'M', 'MA', 'MAR', 'MAS', 'MAT',
        'MCD', 'MCHP', 'MCK', 'MCO', 'MDT', 'MET', 'MHP', 'MHS', 'MJN', 'MKC',
        'MMC', 'MMI', 'MMM', 'MO', 'MOLX', 'MON', 'MOS', 'MPC', 'MRK', 'MRO',
        'MS', 'MSFT', 'MSI', 'MTB', 'MU', 'MUR', 'MWV', 'MWW', 'MYL', 'NBL',
        'NBR', 'NDAQ', 'NE', 'NEE', 'NEM', 'NFLX', 'NFX', 'NI', 'NKE', 'NOC',
        'NOV', 'NRG', 'NSC', 'NTAP', 'NTRS', 'NU', 'NUE', 'NVDA', 'NVLS',
        'NWL', 'NWSA', 'NYX', 'OI', 'OKE', 'OMC', 'ORCL', 'ORLY', 'OXY',
        'PAYX', 'PBCT', 'PBI', 'PCAR', 'PCG', 'PCL', 'PCLN', 'PCP', 'PCS',
        'PDCO', 'PEG', 'PEP', 'PFE', 'PFG', 'PG', 'PGN', 'PGR', 'PH', 'PHM',
        'PKI', 'PLD', 'PLL', 'PM', 'PNC', 'PNW', 'POM', 'PPG', 'PPL', 'PRU',
        'PSA', 'PWR', 'PX', 'PXD', 'QCOM', 'QEP', 'R', 'RAI', 'RDC', 'RF',
        'RHI', 'RHT', 'RL', 'ROK', 'ROP', 'ROST', 'RRC', 'RRD', 'RSG', 'RTN',
        'S', 'SAI', 'SBUX', 'SCG', 'SCHW', 'SE', 'SEE', 'SHLD', 'SHW', 'SIAL',
        'SJM', 'SLB', 'SLE', 'SLM', 'SNA', 'SNDK', 'SNI', 'SO', 'SPG', 'SPLS',
        'SRCL', 'SRE', 'STI', 'STJ', 'STT', 'STZ', 'SUN', 'SVU', 'SWK', 'SWN',
        'SWY', 'SYK', 'SYMC', 'SYY', 'T', 'TAP', 'TDC', 'TE', 'TEG', 'TEL',
        'TER', 'TGT', 'THC', 'TIE', 'TIF', 'TJX', 'TLAB', 'TMK', 'TMO', 'TROW',
        'TRV', 'TSN', 'TSO', 'TSS', 'TWC', 'TWX', 'TXN', 'TXT', 'TYC', 'UNH',
        'UNM', 'UNP', 'UPS', 'URBN', 'USB', 'UTX', 'V', 'VAR', 'VFC', 'VIA.B',
        'VLO', 'VMC', 'VNO', 'VRSN', 'VTR', 'VZ', 'WAG', 'WAT', 'WDC', 'WEC',
        'WFC', 'WFM', 'WFR', 'WHR', 'WIN', 'WLP', 'WM', 'WMB', 'WMT', 'WPI',
        'WPO', 'WU', 'WY', 'WYN', 'WYNN', 'X', 'XEL', 'XL', 'XLNX', 'XOM',
        'XRAY', 'XRX', 'YHOO', 'YUM', 'ZION', 'ZMH'
    ]
    lsSym.append('$SPX')
    lsSym.sort()
    ''' Max lookback is 6 months '''
    dtEnd = dt.datetime.now()
    dtEnd = dtEnd.replace(hour=16, minute=0, second=0, microsecond=0)
    dtStart = dtEnd - relativedelta(months=6)
    ''' Pull in current data '''
    norObj = da.DataAccess('Norgate')
    ''' Get 2 extra months for moving averages and future returns '''
    ldtTimestamps = du.getNYSEdays( dtStart - relativedelta(months=2), \
                                    dtEnd   + relativedelta(months=2), dt.timedelta(hours=16) )

    dfPrice = norObj.get_data(ldtTimestamps, lsSym, 'close')
    dfVolume = norObj.get_data(ldtTimestamps, lsSym, 'volume')
    ''' Imported functions from qstkfeat.features, NOTE: last function is classification '''
    lfcFeatures, ldArgs, lsNames = getFeatureFuncs()
    ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data '''
    applyFeatures(dfPrice, dfVolume, lfcFeatures, ldArgs, sLog=sLog)
Esempio n. 47
0
data = np.loadtxt(inp, delimiter=',',
            dtype={'names': ('year', 'month','day','ticker','action','number'),
                'formats': ('I2','I1','I1','S6','S4','I2')})
sym = data['ticker']
vol = data['number']
act = data['action']
day = []
hour = 16
for i in range(len(data)):
    day.append(dt.datetime(data['year'][i],data['month'][i],data['day'][i],hour))

ls_symbols = list(set(sym))
#ls_symbols.append('_CASH')
dt_timeofday = dt.timedelta(hours=hour)
ldt_timestamps = du.getNYSEdays(day[0], day[-1], dt_timeofday)
c_dataobj = da.DataAccess('Yahoo')
df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close")


exist={}
for sn in range(len(data)):
    exist[day[sn]] = exist.get(day[sn], 0) + 0
    dt_action=day[sn]
    if sn>1:
        na_old = df_alloc.xs(day[sn-1]).values
        print sn, na_old
        if exist[dt_action]<1:
            exist[dt_action] = exist.get(dt_action, 0) + 1
            na_vals=na_old
            for stk in range(len(ls_symbols)):
Esempio n. 48
0
def main():
    '''Main Function'''

    # S&P 100
    ls_symbols = ['TEF.MC', 'IBCX.MU', 'IEGA.L', 'IDYV.L', 'IWDA.L']

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('Yahoo')

    ls_all_syms = c_dataobj.get_all_symbols()
    # Bad symbols are symbols present in portfolio but not in all syms
    ls_bad_syms = list(set(ls_symbols) - set(ls_all_syms))
    for s_sym in ls_bad_syms:
        i_index = ls_symbols.index(s_sym)
        ls_symbols.pop(i_index)

    # Start and End date of the charts
    dt_end = dt.datetime(2013, 1, 1)
    dt_start = dt_end - dt.timedelta(days=365)
    dt_test = dt_end + dt.timedelta(days=365)

    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
    ldt_timestamps_test = du.getNYSEdays(dt_end, dt_test, dt_timeofday)

    # Reading just the close prices
    df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close")
    df_close_test = c_dataobj.get_data(ldt_timestamps_test, ls_symbols,
                                       "close")

    # Filling the data for missing NAN values
    df_close = df_close.fillna(method='ffill')
    df_close = df_close.fillna(method='bfill')
    df_close_test = df_close_test.fillna(method='ffill')
    df_close_test = df_close_test.fillna(method='bfill')

    # Copying the data values to a numpy array to get returns
    na_data = df_close.values.copy()
    na_data_test = df_close_test.values.copy()

    # Getting the daily returns
    tsu.returnize0(na_data)
    tsu.returnize0(na_data_test)

    # Calculating the frontier.
    (lf_returns, lf_std, lna_portfolios, na_avgrets,
     na_std) = getFrontier(na_data)
    (lf_returns_test, lf_std_test, unused, unused,
     unused) = getFrontier(na_data_test)

    # Plotting the efficient frontier
    plt.clf()
    plt.plot(lf_std, lf_returns, 'b')
    plt.plot(lf_std_test, lf_returns_test, 'r')

    # Plot where the efficient frontier would be the following year
    lf_ret_port_test = []
    lf_std_port_test = []
    for na_portfolio in lna_portfolios:
        na_port_rets = np.dot(na_data_test, na_portfolio)
        lf_std_port_test.append(np.std(na_port_rets))
        lf_ret_port_test.append(np.average(na_port_rets))

    plt.plot(lf_std_port_test, lf_ret_port_test, 'k')

    # Plot indivisual stock risk/return as green +
    for i, f_ret in enumerate(na_avgrets):
        plt.plot(na_std[i], f_ret, 'g+')

    # # Plot some arrows showing transistion of efficient frontier
    # for i in range(0, 101, 10):
    #     plt.arrow(lf_std[i], lf_returns[i], lf_std_port_test[i] - lf_std[i],
    #                 lf_ret_port_test[i] - lf_returns[i], color='k')

    # Labels and Axis
    plt.legend([
        '2009 Frontier', '2010 Frontier',
        'Performance of \'09 Frontier in 2010'
    ],
               loc='lower right')
    plt.title('Efficient Frontier For S&P 100 ')
    plt.ylabel('Expected Return')
    plt.xlabel('StDev')
    plt.savefig('tutorial8.pdf', format='pdf')
def analyzeTrades(tradeFile, benchmark):
    '''    #orders.csv
    2011,1,10,AAPL,Buy,1500,
    2011,1,13,AAPL,Sell,1500,
    2011,1,13,IBM,Buy,4000,
    2011,1,26,GOOG,Buy,1000,
    2011,2,2,XOM,Sell,4000,
    2011,2,10,XOM,Buy,4000,
    2011,3,3,GOOG,Sell,1000,
    2011,3,3,IBM,Sell,2200,
    2011,5,3,IBM,Buy,1500,
    2011,6,3,IBM,Sell,3300,
    2011,6,10,AAPL,Buy,1200,
    2011,8,1,GOOG,Buy,55,
    2011,8,1,GOOG,Sell,55,
    2011,12,20,AAPL,Sell,1200,

    #The final value of the portfolio using the sample file is -- 2011,12,20,1133860
    #Details of the Performance of the portfolio :
    #Data Range :  2011-01-10 16:00:00  to  2011-12-20 16:00:00
    #Sharpe Ratio of Fund : 1.21540462111
    #Sharpe Ratio of $SPX : 0.0183391412227
    #Total Return of Fund : 1.13386
    #Total Return of $SPX : 0.97759401457
    #Standard Deviation of Fund :   0.00717514512699
    #Standard Deviation of $SPX :   0.0149090969828
    #Average Daily Return of Fund : 0.000549352749569
    #Average Daily Return of $SPX : 1.72238432443e-05
    
    #The other sample file is orders2.csv that you can use to test your code, and compare with others.
    2011,1,14,AAPL,Buy,1500,
    2011,1,19,AAPL,Sell,1500,
    2011,1,19,IBM,Buy,4000,
    2011,1,31,GOOG,Buy,1000,
    2011,2,4,XOM,Sell,4000,
    2011,2,11,XOM,Buy,4000,
    2011,3,2,GOOG,Sell,1000,
    2011,3,2,IBM,Sell,2200,
    2011,5,23,IBM,Buy,1500,
    2011,6,2,IBM,Sell,3300,
    2011,6,10,AAPL,Buy,1200,
    2011,8,9,GOOG,Buy,55,
    2011,8,11,GOOG,Sell,55,
    2011,12,14,AAPL,Sell,1200,
    #The final value of the portfolio using the sample file is -- 2011,12,14, 1078753
    #Data Range :  2011-01-14 16:00:00  to  2011-12-14 16:00:00
    #Sharpe Ratio of Fund : 0.788988545538
    #Sharpe Ratio of $SPX :-0.177204632551
    #Total Return of Fund : 1.078753
    #Total Return of $SPX : 0.937041848381
    #Standard Deviation of Fund :   0.00708034656073
    #Standard Deviation of $SPX :   0.0149914504972
    #Average Daily Return of Fund : 0.000351904599618
    #Average Daily Return of $SPX :-0.000167347202139    
'''
    np_transactions = readCSV(tradeFile)
    ls_symbols = sorted(set(np_transactions[:, 3]))
    ls_dates = list()
    for a in np_transactions:
        ls_dates.append(dt.datetime(int(a[0]), int(a[1]), int(a[2])))
    dt_min = min(ls_dates)
    dt_max = max(ls_dates) + dt.timedelta(1)

    #notrans = len(np_transactions)
    #df_transactions = pd.DataFrame(np.random.randn(notrans,4),ls_dates,columns=['sym', 'trans', 'lot'])
    #df_transactions.describe()
    #print df_transactions

    ldf_data = getYahooData(dt_min, dt_max, ls_symbols)
    ldf_benchmark = getYahooData(dt_min, dt_max, benchmark)
    ldt_timestamps = du.getNYSEdays(dt_min, dt_max, dt.timedelta(hours=16))

    ldf_close = ldf_data['close']
    ldf_benchmark_close = copy.deepcopy(ldf_benchmark['close'])

    df_alloc = copy.deepcopy(ldf_data['close'])
    df_cash = np.sum(ldf_close, axis=1)
    df_cash.fill(1000000)

    for keys in df_alloc:
        df_alloc[keys] = 0

    # We need closing prices so the timestamp should be hour_s=16.
    dt_cob = dt.timedelta(hours=16)

    for a in np_transactions:
        trans_dt = dt.datetime(int(a[0]), int(a[1]), int(a[2]), 16)
        a[6] = trans_dt

    for a in np_transactions:
        trans_dt = dt.datetime(int(a[0]), int(a[1]), int(a[2]), 16)
        lot = int(a[5])
        sym = a[3]
        transType = str(a[4]).lower()
        price = ldf_close.ix[trans_dt, sym]
        costOfTransaction = lot * price
        cashBefore = df_cash.ix[trans_dt]
        if (transType.find('buy') > -1):
            #df_alloc[a[3]][trans_dt] = a[5]
            df_alloc.ix[trans_dt:, sym] += lot
            df_cash.ix[trans_dt] -= costOfTransaction
        else:
            #df_alloc[a[3]][trans_dt] = -lot
            df_alloc.ix[trans_dt:, sym] -= lot
            df_cash.ix[trans_dt:] += costOfTransaction
        cashAfter = df_cash.ix[trans_dt]
        df_cash.ix[trans_dt:] = cashAfter
        print trans_dt, transType, lot, sym, price, costOfTransaction, cashBefore, cashAfter

    df_portfoliovalue = df_alloc * ldf_close

    df_netliquidation = np.sum(df_portfoliovalue, axis=1)
    #print df_cash.ix[dt.datetime(2011,12,19,16) :]
    #print df_netliquidation.ix[dt_max - dt.timedelta(2) :]
    df_netliquidation += df_cash
    #print df_netliquidation.ix[max(ldt_timestamps) :]
    #print dt.datetime(2011,11,9,16)
    #print df_netliquidation.ix[dt.datetime(2011,11,9,16)]
    #print dt.datetime(2011,3,28,16)
    #print df_netliquidation.ix[dt.datetime(2011,3,28,16)]

    cum_ret, vol, daily_ret, sharpe, na_normalized_price = getPortfolioStats(
        df_netliquidation, [1])
    cum_ret1, vol1, daily_ret1, sharpe1, na_normalized_price1 = getPortfolioStats(
        ldf_benchmark_close, [1])

    print "Final Value of the portfolio      = ", df_netliquidation.ix[max(
        ldt_timestamps)]
    print "Sharpe Ratio of Fund              = ", sharpe
    print "Sharpe Ratio of benchmark         = ", sharpe1
    print "Total Return of Fund              = ", cum_ret
    print "Total Return of benchmark         = ", cum_ret1
    print "Standard Deviation of Fund        = ", vol
    print "Standard Deviation of benchmark   = ", vol1
    print "Average Daily Return of Fund      = ", daily_ret
    print "Average Daily Return of benchmark = ", daily_ret1

    # Plotting the prices with x-axis=timestamps
    plt.clf()
    plt.plot(ldt_timestamps, na_normalized_price, label='portfolio')
    plt.plot(ldt_timestamps, na_normalized_price1, label='benchmark')
    plt.legend()
    plt.ylabel('Returns')
    plt.xlabel('Date')
    fileName = tradeFile + '_marketSim.pdf'
    plt.savefig(fileName, format='pdf')
Esempio n. 50
0
def main():
    initial_cash = 1000000
    orders_file = "../data/orders.csv"
    values_file = "../data/values.csv"
    netCash = initial_cash
    netValue = []
    resultFile = open(values_file, 'wb')
    writer = csv.writer(resultFile, dialect='excel')

    print "***************Market Simulator*********************************"
    dates, symbols, orders, volume, tradeCount, symbolList, dateList = _csv_read_trades(
        orders_file)
    df = pd.read_csv(
        orders_file,
        parse_dates=True,
        names=['year', 'month', 'day', 'symbol', 'order', 'size', 'empty'],
        header=0)
    del df['empty']
    df = df.sort(columns=['year', 'month', 'day'], ascending=1)
    #print df
    startdate = dateList[0]
    enddate = dateList[-1]
    dt_timeofday = dt.timedelta(hours=16)
    print "Fetching Data..."
    ldt_timestamps = du.getNYSEdays(startdate, enddate, dt_timeofday)
    c_dataobj = da.DataAccess('Yahoo')  #, cachestalltime=0)
    ls_keys = ['close']
    ldf_data = c_dataobj.get_data(ldt_timestamps, symbolList, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))
    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method='ffill')
        d_data[s_key] = d_data[s_key].fillna(method='bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)
    na_price = d_data['close'].values
    #print na_price.shape
    ownedStocks = np.zeros(na_price.shape[1])
    date_index = 0
    print "Processing Orders..."
    for ldt_ts in ldt_timestamps:
        order_count = 0
        for order_date in dates:
            if ldt_ts == order_date:
                #print "New Comp:"
                #print symbols[order_count]
                #print order_date,ldt_ts
                symbol_index = 0
                for order_symbols in symbolList:
                    if order_symbols == symbols[order_count]:
                        cash = na_price[date_index][symbol_index] * volume[
                            order_count]
                        if orders[order_count] == "Buy":
                            ownedStocks[symbol_index] = ownedStocks[
                                symbol_index] + volume[order_count]
                            #							print ownedStocks
                            cash = -cash
                        else:
                            ownedStocks[symbol_index] = ownedStocks[
                                symbol_index] - volume[order_count]


#							print ownedStocks
#ownedValue
                        netCash = netCash + cash
                    symbol_index = symbol_index + 1
            order_count = order_count + 1
        #
        sym_idx = 0
        owned_value = 0
        for volume_own in ownedStocks:
            #print volume_own
            #print ownedStocks
            #print na_price[date_index][sym_idx]
            owned_value = owned_value + volume_own * na_price[date_index][
                sym_idx]
            #print owned_value
            sym_idx = sym_idx + 1
        #print netCash
        #print owned_value+netCash
        append1 = owned_value + netCash
        netValue.append(append1)
        results = [
            str(ldt_ts.year),
            str(ldt_ts.month),
            str(ldt_ts.day),
            str(int(append1))
        ]
        writer.writerow(results)
        date_index = date_index + 1
    #print netValue
    print "Generating Output file..."
    print "Done."
    print "****************************************************************"
Esempio n. 51
0
def main(t_val):
    dt_start = dt.datetime(2008, 1, 1)
    dt_end = dt.datetime(2009, 12, 31)
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16))
    drops_below = float(t_val)
    dataobj = da.DataAccess('Yahoo')
    ls_symbols = dataobj.get_symbols_from_list('sp5002012')
    ls_symbols.append('SPY')

    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
    ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))

    for s_key in ls_keys:
        d_data[s_key] = d_data[s_key].fillna(method='ffill')
        d_data[s_key] = d_data[s_key].fillna(method='bfill')
        d_data[s_key] = d_data[s_key].fillna(1.0)

    df_close = d_data['actual_close']
    ts_market = df_close['SPY']

    # Creating an empty dataframe
    df_events = copy.deepcopy(df_close)
    df_events = df_events * np.NAN

    # Time stamps for the event range
    ldt_timestamps = df_close.index

    # Create empty dataframe
    df_columns = ['year', 'month', 'day', 'equity', 'order', 'shares']
    df = pd.DataFrame(columns=df_columns)

    for s_sym in ls_symbols:
        for i in range(1, len(ldt_timestamps)):
            # Calculating the returns for this timestamp
            f_symprice_today = df_close[s_sym].ix[ldt_timestamps[i]]
            f_symprice_yest = df_close[s_sym].ix[ldt_timestamps[i - 1]]

            if f_symprice_yest >= drops_below and f_symprice_today < drops_below:
                row = pd.DataFrame([{
                    'year': ldt_timestamps[i].year,
                    'month': ldt_timestamps[i].month,
                    'day': ldt_timestamps[i].day,
                    'equity': s_sym,
                    'order': 'Buy',
                    'shares': 100
                }])
                df = df.append(row)
                sell_day = i + 5
                if sell_day >= len(ldt_timestamps):
                    sell_day = len(ldt_timestamps) - 1
                row = pd.DataFrame([{
                    'year': ldt_timestamps[sell_day].year,
                    'month': ldt_timestamps[sell_day].month,
                    'day': ldt_timestamps[sell_day].day,
                    'equity': s_sym,
                    'order': 'Sell',
                    'shares': 100
                }])
                df = df.append(row)

    df.to_csv('orders_%s.csv' % (t_val),
              header=None,
              index=None,
              columns=df_columns)
Esempio n. 52
0
def getNyseDaysOfMarketOpenBetween(startOfPeriod, endOfPeriod):
    endPlusOne = endOfPeriod + dt.timedelta(days=1)
    timeOfDay = dt.timedelta(hours=16)
    return du.getNYSEdays(startOfPeriod, endPlusOne, timeOfDay)
Esempio n. 53
0
import QSTK.qstkutil.DataAccess as da
import datetime as dt
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import QSTK.qstkstudy.EventProfiler as ep
import copy

yahoodatabase = da.DataAccess('Yahoo')
syms = yahoodatabase.get_symbols_from_list('sp5002012')
syms.append('SPY')
keys = ['actual_close', 'close']
start_date = dt.datetime(2008, 01, 01)
end_date = dt.datetime(2009, 12, 31)
delta_time = dt.timedelta(hours=16)
opentimes = du.getNYSEdays(start_date, end_date, delta_time)
prices = yahoodatabase.get_data(opentimes, syms, keys)
prices_dic = dict(zip(keys, prices))

for key in keys:
    prices_dic[key] = prices_dic[key].fillna(method='ffill')
    prices_dic[key] = prices_dic[key].fillna(method='bfill')
    prices_dic[key] = prices_dic[key].fillna(1.0)

prices_actclose_all = prices_dic['actual_close']
prices_actclose_SPY = prices_actclose_all['SPY']

events = copy.deepcopy(prices_actclose_all)
events = events * np.NAN

for sym in syms:
Esempio n. 54
0
dt_start = min(df_orders.index)
dt_end = max(df_orders.index)
ls_dt_all_from_orders = df_orders.index.tolist()
sym_all_from_orders = df_orders['sym'].tolist()
#remove duplicate syms, dates
ls_sym_unique = list(set(sym_all_from_orders))
ls_dt_unique = list(set(ls_dt_all_from_orders))
ls_dt_unique.sort()

## STEP 2 -- put this into a function!
#read in the data from Yahoo
dataobj = da.DataAccess('Yahoo')
dt_start_read = dt_start
dt_end_read = dt_end + dt.timedelta(
    days=1)  #end date needs to be offset by one
ldt_timestamps = du.getNYSEdays(dt_start_read, dt_end_read,
                                dt.timedelta(hours=16))
ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
ldf_data = dataobj.get_data(ldt_timestamps, ls_sym_unique, ls_keys)
d_data = dict(zip(
    ls_keys, ldf_data))  #this is the data for the symbols we're interested in
#remove the NaNs from the price data
for s_key in ls_keys:
    d_data[s_key] = d_data[s_key].fillna(method='ffill')
    d_data[s_key] = d_data[s_key].fillna(method='bfill')
    d_data[s_key] = d_data[s_key].fillna(1.0)

## STEP 3
#dataframe for SHARES of each symbol that you are CURRENTLY HOLDING- make sure they are floating point numbers!
df_trade_matrix = np.zeros((len(ldt_timestamps), len(ls_sym_unique)))
df_trade_matrix = pd.DataFrame(df_trade_matrix,
                               index=ldt_timestamps,
Esempio n. 55
0
def tutorial01():
    #  Define the company list to get the stock prices.
    ls_symbols = ["AAPL", "GLD", "GOOG", "$SPX", "XOM"]
    #  Define the date range of stock data.
    dt_start = dt.datetime(2006, 1, 1)
    dt_end = dt.datetime(2010, 12, 31)
    #  Define the time of date as 4:00 PM, when it is the close of the day.
    dt_timeofday = dt.timedelta(hours=16)
    #  Create a timestamp list object for QSTK.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)

    #  Define the data repository as Yahoo Finance.
    c_dataobj = da.DataAccess("Yahoo")
    #  Define data keys.
    ls_keys = ["open", "high", "low", "close", "volume", "actual_close"]
    #  Retrieve data.
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    #  Create a dictionary (like std::map in C++).
    d_data = dict(zip(ls_keys, ldf_data))

    #  Choose a set of data to plot.
    na_price = d_data["close"].values
    #  Clear the figure canvas.
    plt.clf()
    #  Plot the data.
    plt.plot(ldt_timestamps, na_price)
    #  Arrange the apprearance of the figure.
    plt.legend(ls_symbols)
    plt.ylabel("Adjusted Close")
    plt.xlabel("Date")
    #  Save the figure in PDF format.
    plt.savefig("adjustedclose.pdf", format="pdf")

    #  Normalize the price data so that data.begin = 1.0.
    na_normalized_price = na_price / na_price[0, :]
    #  Draw another figure and save it.
    plt.clf()
    plt.plot(ldt_timestamps, na_normalized_price)
    plt.legend(ls_symbols)
    plt.ylabel("Normalized Close")
    plt.xlabel("Date")
    plt.savefig("normalizedclose.pdf", format="pdf")

    #  Calculate daily returns.
    na_rets = na_normalized_price.copy()
    tsu.returnize0(na_rets)
    #  Draw another figure and save it.
    plt.clf()
    plt.plot(ldt_timestamps, na_rets)
    plt.legend(ls_symbols)
    plt.ylabel("Daily Returns")
    plt.xlabel("Date")
    plt.savefig("dailyreturns.pdf", format="pdf")

    #  Check the correlation between '$SPX' and 'XOM' using scatter plots.
    plt.clf()
    plt.scatter(na_rets[:, 3], na_rets[:, 1], c='blue')
    plt.xlabel("$SPX")
    plt.ylabel("XOM")
    plt.savefig("correlationscatter.pdf", format="pdf")

    #  Calculate cumulative returns.
    daily_cum_ret = np.empty(na_rets.shape)
    daily_cum_ret[0, :] = 1.0
    for t in range(1, na_rets.shape[0]):
        daily_cum_ret[t] = daily_cum_ret[t - 1] * (1.0 + na_rets[t, :])
    plt.clf()
    plt.plot(ldt_timestamps, daily_cum_ret)
    plt.legend(ls_symbols)
    plt.ylabel("Cumulative Returns")
    plt.xlabel("Date")
    plt.savefig("cumulativereturns.pdf", format="pdf")
Esempio n. 56
0
def main():
    '''Main Function'''

    # S&P 100
    ls_symbols = [
        'AAPL', 'ABT', 'ACN', 'AEP', 'ALL', 'AMGN', 'AMZN', 'APC', 'AXP', 'BA',
        'BAC', 'BAX', 'BHI', 'BK', 'BMY', 'BRK.B', 'CAT', 'C', 'CL', 'CMCSA',
        'COF', 'COP', 'COST', 'CPB', 'CSCO', 'CVS', 'CVX', 'DD', 'DELL', 'DIS',
        'DOW', 'DVN', 'EBAY', 'EMC', 'EXC', 'F', 'FCX', 'FDX', 'GD', 'GE',
        'GILD', 'GOOG', 'GS', 'HAL', 'HD', 'HNZ', 'HON', 'HPQ', 'IBM', 'INTC',
        'JNJ', 'JPM', 'KFT', 'KO', 'LLY', 'LMT', 'LOW', 'MA', 'MCD', 'MDT',
        'MET', 'MMM', 'MO', 'MON', 'MRK', 'MS', 'MSFT', 'NKE', 'NOV', 'NSC',
        'NWSA', 'NYX', 'ORCL', 'OXY', 'PEP', 'PFE', 'PG', 'PM', 'QCOM', 'RF',
        'RTN', 'SBUX', 'SLB', 'HSH', 'SO', 'SPG', 'T', 'TGT', 'TWX', 'TXN',
        'UNH', 'UPS', 'USB', 'UTX', 'VZ', 'WAG', 'WFC', 'WMB', 'WMT', 'XOM'
    ]

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('Yahoo')

    ls_all_syms = c_dataobj.get_all_symbols()
    # Bad symbols are symbols present in portfolio but not in all syms
    ls_bad_syms = list(set(ls_symbols) - set(ls_all_syms))
    for s_sym in ls_bad_syms:
        i_index = ls_symbols.index(s_sym)
        ls_symbols.pop(i_index)

    # Start and End date of the charts
    dt_end = dt.datetime(2010, 1, 1)
    dt_start = dt_end - dt.timedelta(days=365)
    dt_test = dt_end + dt.timedelta(days=365)

    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt_timeofday)
    ldt_timestamps_test = du.getNYSEdays(dt_end, dt_test, dt_timeofday)

    # Reading just the close prices
    df_close = c_dataobj.get_data(ldt_timestamps, ls_symbols, "close")
    df_close_test = c_dataobj.get_data(ldt_timestamps_test, ls_symbols,
                                       "close")

    # Filling the data for missing NAN values
    df_close = df_close.fillna(method='ffill')
    df_close = df_close.fillna(method='bfill')
    df_close_test = df_close_test.fillna(method='ffill')
    df_close_test = df_close_test.fillna(method='bfill')

    # Copying the data values to a numpy array to get returns
    na_data = df_close.values.copy()
    na_data_test = df_close_test.values.copy()

    # Getting the daily returns
    tsu.returnize0(na_data)
    tsu.returnize0(na_data_test)

    # Calculating the frontier.
    (lf_returns, lf_std, lna_portfolios, na_avgrets,
     na_std) = getFrontier(na_data)
    (lf_returns_test, lf_std_test, unused, unused,
     unused) = getFrontier(na_data_test)

    # Plotting the efficient frontier
    plt.clf()
    plt.plot(lf_std, lf_returns, 'b')
    plt.plot(lf_std_test, lf_returns_test, 'r')

    # Plot where the efficient frontier would be the following year
    lf_ret_port_test = []
    lf_std_port_test = []
    for na_portfolio in lna_portfolios:
        na_port_rets = np.dot(na_data_test, na_portfolio)
        lf_std_port_test.append(np.std(na_port_rets))
        lf_ret_port_test.append(np.average(na_port_rets))

    plt.plot(lf_std_port_test, lf_ret_port_test, 'k')

    # Plot indivisual stock risk/return as green +
    for i, f_ret in enumerate(na_avgrets):
        plt.plot(na_std[i], f_ret, 'g+')

    # # Plot some arrows showing transistion of efficient frontier
    # for i in range(0, 101, 10):
    #     plt.arrow(lf_std[i], lf_returns[i], lf_std_port_test[i] - lf_std[i],
    #                 lf_ret_port_test[i] - lf_returns[i], color='k')

    # Labels and Axis
    plt.legend([
        '2009 Frontier', '2010 Frontier',
        'Performance of \'09 Frontier in 2010'
    ],
               loc='lower right')
    plt.title('Efficient Frontier For S&P 100 ')
    plt.ylabel('Expected Return')
    plt.xlabel('StDev')
    plt.savefig('tutorial8.pdf', format='pdf')
Esempio n. 57
0
symbol_list = list(set(symbol_list))
date_trade_list = sorted(date_trade_list)
date_list = sorted(list(set(date_list)))
dt_end_read = date_trade_list[-1][0] + dt.timedelta(days=1)

dataobj = da.DataAccess('Yahoo')
symbols = symbol_list
startdate = date_trade_list[0][0]
enddate = dt_end_read

dt_timeofday = dt.timedelta(hours=16)
dt_start = startdate
dt_end = enddate
ls_symbols = symbols
ldt_timestamps = du.getNYSEdays(startdate, enddate, dt_timeofday)
ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
d_data = dict(zip(ls_keys, ldf_data))
for s_key in ls_keys:
    d_data[s_key] = d_data[s_key].fillna(method='ffill')
    d_data[s_key] = d_data[s_key].fillna(method='bfill')
    d_data[s_key] = d_data[s_key].fillna(1.0)

df_close = d_data['close']
df = pd.DataFrame(np.zeros((len(ldt_timestamps), len(symbols))),
                  index=ldt_timestamps,
                  columns=symbols)
df_price = pd.DataFrame(np.zeros((len(ldt_timestamps), len(symbols))),
                        index=ldt_timestamps,
                        columns=symbols)
Esempio n. 58
0
def simulate(start_date, end_date, symbols, allocations):
    #START DATE
    #END DATE
    #SYMBOLS FOR EQUITIES
    #ALLOCATIONS

    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Get a list of trading days between the start and the end.
    ldt_timestamps = du.getNYSEdays(start_date, end_date, dt_timeofday)

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess('Yahoo', cachestalltime=0)

    # Keys to be read from the data, it is good to read everything in one go.
    keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']

    # Reading the data, now d_data is a dictionary with the keys above.
    # Timestamps and symbols are the ones that were specified before.
    ldf_data = c_dataobj.get_data(ldt_timestamps, symbols, keys)
    d_data = dict(zip(keys, ldf_data))

    # Getting the numpy ndarray of close prices.
    na_price = d_data['close'].values

    # Normalizing the prices to start at 1 and see relative returns
    na_normalized_price = na_price / na_price[0, :]

    # Copy the normalized prices to a new ndarry to find returns.
    na_rets = na_normalized_price.copy()

    # Calculate the daily returns of the prices. (Inplace calculation)
    # returnize0 works on ndarray and not dataframes.
    tsu.returnize0(na_rets)  ### CHANGE THIS!!

    #ADD 1 TO na_rets
    na_rets = na_rets + 1

    #calculate standard deviation of returns
    std_rets = range(na_rets.shape[1])
    for c in range(na_rets.shape[1]):
        column = na_rets[:, c]
        col_mean = column.mean()
        col_std = column.std()
        std_rets[c] = col_std

    #calculate average daily return of TOTAL portfolio
    total_daily_ret = na_rets[:, 1].copy(
    )  #placeholder for the daily return of TOTAL portfolio
    numdays = len(range(na_rets.shape[0]))
    for c in range(na_rets.shape[0]):
        #find the total return for that day
        if c == 0:
            total_ret_today_percent = 0
        else:
            na_rets_today = na_rets[c, :]
            na_rets_yesterday = na_rets[c - 1, :]
            total_ret_today_dollars = sum(allocations *
                                          (na_rets_today - na_rets_yesterday))
            total_value_yesterday = sum(allocations * (na_rets_yesterday))
            total_ret_today_percent = total_ret_today_dollars / total_value_yesterday

        total_daily_ret[c] = total_ret_today_percent

    average_daily_ret = np.mean(total_daily_ret)
    std_daily_ret = np.std(total_daily_ret)
    cum_return = sum(total_daily_ret)
    sharpe_ratio = numdays * average_daily_ret / std_daily_ret  #sharpe ratio

    return std_daily_ret, average_daily_ret, sharpe_ratio, cum_return
    # Sort the traded dates by date order
    traded_dates.sort(key=lambda trade: trade[0])
    traded_dates = np.array(traded_dates)

    # Add a day to the datetime
    period_end = period_end + dt.timedelta(days=1)

    # print traded_dates
    print "Trading symbols:" + str(symbols)
    print "From:" + str(period_start)
    print "To:" + str(period_end)

    # 3 - Read in data
    #     - Read in adjusted close

    ldt_timestamps = du.getNYSEdays(period_start, period_end,
                                    dt.timedelta(hours=16))
    dataobj = da.DataAccess('Yahoo')
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
    ldf_data = dataobj.get_data(ldt_timestamps, symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))

    # Read in adjusted closing prices for the equities.
    adjusted_close_price = d_data['close'].values

    timestamps = np.array(ldt_timestamps)
    timestamps = timestamps.reshape((timestamps.size, 1))
    adjusted_close_price = np.concatenate((adjusted_close_price, timestamps),
                                          axis=1)

    # 4 - Scan trades to update cash
    #     - BUY is a cash reduction
Esempio n. 60
0
def analysis(mkt, dt_date):

    plot_chart = False
    '''Main Function'''

    #ls_symbols = ["G"]

    # We need closing prices so the timestamp should be hours=16.
    dt_timeofday = dt.timedelta(hours=16)

    # Start and End date of the charts
    #dt_start = dt.datetime.strptime(sys.argv[1], "%Y/%m/%d") + dt.timedelta(hours=16)
    dt_start = dt.datetime(2016, 1, 1) + dt_timeofday
    dt_end = dt_date  #dt.datetime.strptime(datestr, "%Y/%m/%d") + dt_timeofday

    # Get a list of trading days between the start and the end.
    ldt_timestamps = qdu.getNYSEdays(dt_start, dt_end, dt_timeofday, mkt)
    dt_start = qdu.getNYSEoffset(dt_end, -199, mkt)
    ldt_timestamps = qdu.getNYSEdays(dt_start, dt_end, dt_timeofday, mkt)

    # Creating an object of the dataaccess class with Yahoo as the source.
    c_dataobj = da.DataAccess(mkt + 'Yahoo')

    # Keys to be read from the data, it is good to read everything in one go.
    ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']

    # Reading the data, now d_data is a dictionary with the keys above.
    # Timestamps and symbols are the ones that were specified before.
    ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
    d_data = dict(zip(ls_keys, ldf_data))

    # Copying close price into separate dataframe to find rets
    df_close = d_data['close']
    df_actual_close = d_data['actual_close']

    #SMA
    df_sma50 = pd.rolling_mean(d_data['actual_close'], 50)
    df_sma200 = pd.rolling_mean(d_data['actual_close'], 200)

    #Bollinger Bands
    df_mean = pd.rolling_mean(d_data['actual_close'], 20)
    df_std = pd.rolling_std(d_data['actual_close'], 20)
    upper_bband = df_mean + (2 * df_std)
    lower_bband = df_mean - (2 * df_std)
    df_bollinger = (df_actual_close - df_mean) / (2 * df_std)

    #MACD
    emaslow = pd.ewma(d_data['actual_close'], span=26)
    emafast = pd.ewma(d_data['actual_close'], span=12)
    macd = emafast - emaslow
    ema9 = pd.ewma(macd, span=9)

    str_date = dt.datetime.strftime(dt_end, "%Y%m%d")
    f = open(analysisPath + 'analysis-' + str_date + '.csv', 'w')
    f.write(
        'symbol,close,sma50,sma200,sma50/sma200,upper_bband,lower_bband,bollinger,macd,ema9,macd-ema9\n'
    )
    #print df_close.tail()
    #print df_mean.tail()
    #print df_std.tail()
    #print df_bollinger.tail()
    # Plotting the prices with x-axis=timestamps

    for ls_symbol in ls_symbols:
        #print ls_symbol
        #print ls_symbol + ",{},{},{},{}".format(df_close[ls_symbol][dt_end],df_sma50[ls_symbol][dt_end],df_sma200[ls_symbol][dt_end], df_bollinger[ls_symbol][dt_end])
        f.write(ls_symbol + ",{},{},{},{},{},{},{},{},{},{}".format(
            df_close[ls_symbol][dt_end], df_sma50[ls_symbol][dt_end],
            df_sma200[ls_symbol][dt_end], df_sma50[ls_symbol][dt_end] /
            df_sma200[ls_symbol][dt_end], upper_bband[ls_symbol][dt_end],
            lower_bband[ls_symbol][dt_end], df_bollinger[ls_symbol][dt_end],
            macd[ls_symbol][dt_end], ema9[ls_symbol][dt_end],
            macd[ls_symbol][dt_end] - ema9[ls_symbol][dt_end]) + '\n')

        #print df_bollinger[ls_symbol].tail(1)
        if plot_chart:
            if df_bollinger[ls_symbol].tail(1) <= -0.75:
                plt.clf()
                plt.subplot(211)
                plt.plot(ldt_timestamps, df_close[ls_symbol], label=ls_symbol)
                plt.legend()
                plt.ylabel('Price')
                plt.xlabel('Date')
                plt.xticks(size='xx-small')
                plt.xlim(ldt_timestamps[0], ldt_timestamps[-1])
                plt.subplot(212)
                plt.plot(ldt_timestamps,
                         df_bollinger[ls_symbol],
                         label=ls_symbol + '-Bollinger')
                plt.axhline(1.0, color='r')
                plt.axhline(-1.0, color='r')
                plt.legend()
                plt.ylabel('Bollinger')
                plt.xlabel('Date')
                plt.xticks(size='xx-small')
                plt.xlim(ldt_timestamps[0], ldt_timestamps[-1])
                plt.savefig(ls_symbol + '.pdf', format='pdf')

    f.close()