def get_clean_prices(symbols=None, dataobj=dataobj, start=None, end=None, market_sym='$SPX', reset_cache=True): start = util.normalize_date(start or datetime.date(2008, 1, 1)) end = util.normalize_date(end or datetime.date(2009, 12, 31)) symbols = normalize_symbols(symbols) symbols += [market_sym] print "Calculating timestamps for {0} SP500 symbols".format(len(symbols)) ldt_timestamps = du.getNYSEdays(start, end, datetime.timedelta(hours=16)) ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] print "Retrieving data for {0} SP500 symbols between {1} and {2}.".format(len(symbols), start, end) ldf_data = dataobj.get_data(ldt_timestamps, symbols, ls_keys, ) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: print 'cleaning nans from the column {0}'.format(repr(s_key)) d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) return d_data
def price_dataframe(symbols='sp5002012', start=datetime.datetime(2008, 1, 1), end=datetime.datetime(2009, 12, 31), price_type='actual_close', cleaner=clean_dataframe, ): """Retrieve the prices of a list of equities as a DataFrame (columns = symbols) Arguments: symbols (list of str): Ticker symbols like "GOOG", "AAPL", etc e.g. ["AAPL", " slv ", GLD", "GOOG", "$SPX", "XOM", "msft"] start (datetime): The date at the start of the period being analyzed. end (datetime): The date at the end of the period being analyzed. Yahoo data stops at 2013/1/1 """ if isinstance(price_type, basestring): price_type = [price_type] start = util.normalize_date(start or datetime.date(2008, 1, 1)) end = util.normalize_date(end or datetime.date(2009, 12, 31)) symbols = normalize_symbols(symbols) t = du.getNYSEdays(start, end, datetime.timedelta(hours=16)) df = clean_dataframes(dataobj.get_data(t, symbols, price_type)) if not df or len(df) > 1: return cleaner(df) else: return cleaner(df[0])
def get_clean_prices(symbols=None, dataobj=dataobj, start=None, end=None, market_sym='$SPX', reset_cache=True): start = util.normalize_date(start or datetime.date(2008, 1, 1)) end = util.normalize_date(end or datetime.date(2009, 12, 31)) symbols = normalize_symbols(symbols) symbols += [market_sym] print "Calculating timestamps for {0} SP500 symbols".format(len(symbols)) ldt_timestamps = du.getNYSEdays(start, end, datetime.timedelta(hours=16)) ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] print "Retrieving data for {0} SP500 symbols between {1} and {2}.".format( len(symbols), start, end) ldf_data = dataobj.get_data( ldt_timestamps, symbols, ls_keys, ) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: print 'cleaning nans from the column {0}'.format(repr(s_key)) d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) return d_data
def symbols_bollinger(symbols='sp5002012', start=datetime.datetime(2008, 1, 1), end=datetime.datetime(2009, 12, 31), price_type='adjusted_close', cleaner=clean_dataframe, window=20, sigma=1.): """Calculate the Bolinger for a list or set of symbols Example: >>> symbols_bollinger(["AAPL", "GOOG", "IBM", "MSFT"], '10-12-01', '10-12-30')[-5:] # doctest: +NORMALIZE_WHITESPACE GOOG AAPL IBM MSFT 2010-12-23 16:00:00 1.298178 1.185009 1.177220 1.237684 2010-12-27 16:00:00 1.073603 1.371298 0.590403 0.932911 2010-12-28 16:00:00 0.745548 1.436278 0.863406 0.812844 2010-12-29 16:00:00 0.874885 1.464894 2.096242 0.752602 2010-12-30 16:00:00 0.634661 0.793493 1.959324 0.498395 """ symbols = normalize_symbols(symbols) prices = price_dataframe(symbols, start=start, end=end, price_type=price_type, cleaner=cleaner) return frame_bollinger(prices, window=window, sigma=sigma, plot=False)
def price_dataframe( symbols='sp5002012', start=datetime.datetime(2008, 1, 1), end=datetime.datetime(2009, 12, 31), price_type='actual_close', cleaner=clean_dataframe, ): """Retrieve the prices of a list of equities as a DataFrame (columns = symbols) Arguments: symbols (list of str): Ticker symbols like "GOOG", "AAPL", etc e.g. ["AAPL", " slv ", GLD", "GOOG", "$SPX", "XOM", "msft"] start (datetime): The date at the start of the period being analyzed. end (datetime): The date at the end of the period being analyzed. Yahoo data stops at 2013/1/1 """ if isinstance(price_type, basestring): price_type = [price_type] start = util.normalize_date(start or datetime.date(2008, 1, 1)) end = util.normalize_date(end or datetime.date(2009, 12, 31)) symbols = normalize_symbols(symbols) t = du.getNYSEdays(start, end, datetime.timedelta(hours=16)) df = clean_dataframes(dataobj.get_data(t, symbols, price_type)) if not df or len(df) > 1: return cleaner(df) else: return cleaner(df[0])
def symbol_bollinger(symbol='GOOG', start=datetime.datetime(2008, 1, 1), end=datetime.datetime(2009, 12, 31), price_type='close', cleaner=clean_dataframe, window=20, sigma=1.): """Calculate the Bolinger indicator value >>> symbol_bollinger("goog", '2008-1-1', '2008-2-1')[-1] # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE -1.8782... """ symbols = normalize_symbols(symbol) prices = price_dataframe(symbols, start=start, end=end, price_type=price_type, cleaner=cleaner) return series_bollinger(prices[symbols[0]], window=window, sigma=sigma, plot=False)
def portfolio_prices( symbols=("AAPL", "GLD", "GOOG", "$SPX", "XOM", "msft"), start=datetime.datetime(2005, 1, 1), end=datetime.datetime(2011, 12, 31), # data stops at 2013/1/1 normalize=True, allocation=None, price_type='actual_close', ): """Calculate the Sharpe Ratio and other performance metrics for a portfolio Arguments: symbols (list of str): Ticker symbols like "GOOG", "AAPL", etc start (datetime): The date at the start of the period being analyzed. end (datetime): The date at the end of the period being analyzed. normalize (bool): Whether to normalize prices to 1 at the start of the time series. allocation (list of float): The portion of the portfolio allocated to each equity. """ symbols = normalize_symbols(symbols) start = util.normalize_date(start) end = util.normalize_date(end) if allocation is None: allocation = [1. / len(symbols)] * len(symbols) if len(allocation) < len(symbols): allocation = list(allocation) + [1. / len(symbols) ] * (len(symbols) - len(allocation)) total = np.sum(allocation.sum) allocation = np.array([(float(a) / total) for a in allocation]) timestamps = du.getNYSEdays(start, end, datetime.timedelta(hours=16)) ls_keys = [price_type] ldf_data = da.get_data(timestamps, symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) na_price = d_data[price_type].values if normalize: na_price /= na_price[0, :] na_price *= allocation return np.sum(na_price, axis=1)
def portfolio_prices( symbols=("AAPL", "GLD", "GOOG", "$SPX", "XOM", "msft"), start=datetime.datetime(2005, 1, 1), end=datetime.datetime(2011, 12, 31), # data stops at 2013/1/1 normalize=True, allocation=None, price_type='actual_close', ): """Calculate the Sharpe Ratio and other performance metrics for a portfolio Arguments: symbols (list of str): Ticker symbols like "GOOG", "AAPL", etc start (datetime): The date at the start of the period being analyzed. end (datetime): The date at the end of the period being analyzed. normalize (bool): Whether to normalize prices to 1 at the start of the time series. allocation (list of float): The portion of the portfolio allocated to each equity. """ symbols = normalize_symbols(symbols) start = util.normalize_date(start) end = util.normalize_date(end) if allocation is None: allocation = [1. / len(symbols)] * len(symbols) if len(allocation) < len(symbols): allocation = list(allocation) + [1. / len(symbols)] * (len(symbols) - len(allocation)) total = np.sum(allocation.sum) allocation = np.array([(float(a) / total) for a in allocation]) timestamps = du.getNYSEdays(start, end, datetime.timedelta(hours=16)) ls_keys = [price_type] ldf_data = da.get_data(timestamps, symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) na_price = d_data[price_type].values if normalize: na_price /= na_price[0, :] na_price *= allocation return np.sum(na_price, axis=1)