def main(): print "Getting Symbols" # symbols = np.loadtxt('sp5002012.txt',dtype='S10',comments='#', skiprows=1) symbols = ['GOOG', 'CVX', 'KIM', 'SPY', 'DELL', 'CAT' ] startday = datetime.date(2012, 1 , 1) endday = datetime.date(2012, 12, 31) #Get index index = web.get_data_yahoo("SPY", startday, endday).index print "Getting Data" dataAll = {} for symbol in symbols: try: dataAll[symbol] = web.get_data_yahoo(symbol, startday, endday) if len(dataAll[symbol].index) != 250: print symbol, len(dataAll[symbol].index) except: print "Could not obtain data for: " + symbol print "Analyzing Signals" signals = [] for symbol in dataAll.keys(): generateSignals(symbol,dataAll, signals) signals2 = defaultdict(list) for timestamp, symbol, rating in signals: signals2[timestamp].extend([[symbol, rating]]) return (dataAll.keys(), signals2, dataAll, index)
def get_stocks(tickers, market, start_date, end_date, frequency): #Set Frequency for resampling FREQ_DICT = { 'Weekly': 'W-FRI', 'Monthly': 'M', } start_yahoo = datetime.datetime.strptime(start_date, '%d/%m/%Y') end_yahoo = datetime.datetime.strptime(end_date, '%d/%m/%Y') #Set market portfolio if (market != 'TA100') and (market != 'TA25'): if (market == 'SP500'): prices = DataFrame(web.get_data_yahoo('VFINX', start_yahoo, end_yahoo)['Adj Close'].resample(FREQ_DICT[frequency], how='last', fill_method='ffill'), columns=['SP500']) else: prices = DataFrame(web.get_data_yahoo(market, start_yahoo, end_yahoo)['Adj Close'].resample(FREQ_DICT[frequency], how='last', fill_method='ffill'), columns=[market]) else: prices = get_index_price(index_id = market, start_date = start_date, end_date = end_date, frequency = frequency).resample(FREQ_DICT[frequency], how = 'last') #Set Stocks Prices i = 0 while (i < len(tickers)): get_df_ticker = DataFrame(web.get_data_yahoo(tickers[i], start_yahoo, end_yahoo)['Adj Close'].resample(FREQ_DICT[frequency], how = 'last'), columns=[tickers[i]]) prices = pd.concat([prices, get_df_ticker], join='outer', axis = 1) i += 1 changes = prices.pct_change() return prices, changes[1:]
def test_get_data(self): #single symbol #http://finance.yahoo.com/q/hp?s=GOOG&a=09&b=08&c=2010&d=09&e=10&f=2010&g=d df = web.get_data_yahoo('GOOG') assert df.Volume.ix['OCT-08-2010'] == 2859200 sl = ['AAPL', 'AMZN', 'GOOG'] pan = web.get_data_yahoo(sl, '2012') ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] assert ts[0].dayofyear == 96 dfi = web.get_components_yahoo('^DJI') pan = web.get_data_yahoo(dfi, 'JAN-01-12', 'JAN-31-12') expected = [19.02, 28.23, 25.39] result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist() assert result == expected pan = web.get_data_yahoo(dfi, 'JAN-01-12', 'JAN-31-12', adjust_price=True) expected = [18.38, 27.45, 24.54] result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist() assert result == expected pan = web.get_data_yahoo(dfi, '2011', ret_index=True) d = [[ 1.01757469, 1.01130524, 1.02414183], [ 1.00292912, 1.00770812, 1.01735194], [ 1.00820152, 1.00462487, 1.01320257], [ 1.08025776, 0.99845838, 1.00113165]] expected = pd.DataFrame(d) result = pan.Ret_Index.ix['01-18-11':'01-21-11'][['GE', 'INTC', 'MSFT']] assert_almost_equal(result.values, expected.values)
def save_data(): start = '1/1/1990' # Get S&P 500 data from yahoo sp500 = get_data_yahoo('^GSPC', start=start)['Adj Close'] sp500.name = 'SP500' vix = get_data_yahoo('^VIX', start=start)['Adj Close'] vix.name = 'VIX' # Get ten year and 3 month t-bill rates ten_yr = DataReader('DGS10', 'fred', start=start) three_mon = DataReader('DGS3MO', 'fred', start=start) ten_yr = ten_yr.ix[ten_yr.DGS10.str.count(r'^\.') != 1].astype(float) three_mon = three_mon.ix[three_mon.DGS3MO.str.count(r'^\.') != 1].astype(float) data = ten_yr.join(three_mon) data = data.join(sp500) data = data.join(vix) # Drop non-like observations (obs on different days) data = data.dropna() data.save('SP_YC.db') data.to_csv('the_data.csv')
def get_px(rorStyle=0): getTicker = raw_input("Provide mutual fund tickers seperated by commas (no error catching here so be exact!): ").split(",") cleanTickers = [x.strip() for x in getTicker] print cleanTickers if rorStyle == 0: px = DataFrame({n: web.get_data_yahoo(n, start='1980-01-01')['Adj Close'].pct_change() for n in cleanTickers}).dropna() elif rorStyle == 1: px = np.log(DataFrame({n: web.get_data_yahoo(n, start='1980-01-01')['Adj Close'].pct_change() for n in cleanTickers}).dropna() + 1) return px
def get_bench(rorStyle=0): getTicker = raw_input("Provide mutual fund or index ticker to be used as benchmark (^gspc is sp500): ").split(",") cleanTicker = [x.strip() for x in getTicker] print "Benchmark is: ", cleanTicker if rorStyle == 0: ror = web.get_data_yahoo(cleanTicker, start='1980-01-01')['Adj Close'].pct_change().dropna() elif rorStyle == 1: px = web.get_data_yahoo(cleanTicker, start='1980-01-01')['Adj Close'] ror = np.log(px / px.shift(1)) return ror
def get_historical_data(ticker, start_date, end_date): daily_data = web.get_data_yahoo(ticker, start=start_date, end=end_date) num_days = len(daily_data) timeseries = range(0, num_days) values = [daily_data['Adj Close'][i] for i in xrange(num_days)] datetimes = map(lambda tm: datetime.datetime(tm.year, tm.month, tm.day), daily_data.index.tolist()) return [timeseries, values, datetimes]
def longTerm(symbol): a=web.get_data_yahoo(symbol, (datetime.datetime.now() - datetime.timedelta(days = 100)), datetime.datetime.now())['Adj Close'] # here are the given number of M, alpha, and beta xa=[] for i in range(0,len(a)): xa.append(i) ya=a plt.figure(1) value = 11 ss = s2 (value,xa) mm = mx(value,xa,ya) s = ss[0][0] m = mm[0] #print the mean and variance b = len(ya) print b index = m - ya[b-1] if(index < -m*0.005): return "Sell" elif(index < m*0.005): return "Hold" else: return "Buy More"
def main(): for ticker in ['AAPL','IBM','MSFT','GOOG']: all_data[ticker]=web.get_data_yahoo(ticker,'1/3/200','12/31/2009') price=DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()}) volume=DataFrame({tic: data['Volume'] for tic, data in all_data.iteritems()}) returns=(price-price.shift(1))/price
def finance(): chart = 'CRME' if request.method == "POST": chart = request.form.get('chart') start = datetime.datetime(2010, 3, 1) # =========== Yahoo API ====================== # df = web.get_data_yahoo( 'TSLA', start, end, interval='w' ) df = web.get_data_yahoo(chart, start) # convert the dates for the hover tool dates = pd.Series(df.index) dates = dates.dt.strftime('%d-%m-%Y').tolist() # ========== Hover tools configuration ======== # Make a list of strings for every value(the hover tool accepts only str) # open_p and close the _p is beqause open and close is funcs in python open_p = [str(i) for i in df.Open] high = [str(i) for i in df.High] low = [str(i) for i in df.Low] close_p = [str(i) for i in df.Close] vol = [str(i) for i in df.Volume] adj = [str(i) for i in df['Adj Close']] TOOLS = 'pan,wheel_zoom,box_zoom,hover,crosshair,resize,reset' source1 = cds({ "Date": dates, "Open": open_p, "High": high, "Low": low, "Close": close_p, "Volume": vol, "Adj": adj }) source2 = cds({ "Date": dates, "Open": open_p, "High": high, "Low": low, "Close": close_p, "Volume": vol, "Adj": adj }) TOOLTIPS = [("Date", "@Date"), ("Open", "@Open"), ("High", "@High"), ("Low", "@Low"), ("Close", "@Close"), ("Volume", "@Volume"), ("Adj Close*", "@Adj")] # Make the figure configuration f = figure(height = 270, x_axis_type = "datetime", tools = TOOLS, responsive = True) # Add title and label f.title = 'Historical Prices for ' + chart + " from 1.03.2010 until yesterday" f.xaxis.axis_label = 'Date' f.yaxis.axis_label = 'Open Prices' # make line and circle plots f.line(df.index, df.Open, source = source2, color = 'blue') # f.circle(df.index, df.Open, source = source1, color = 'navy', size = 0.5, alpha = 0.8) # other hover tool conf p_hover = f.select(HoverTool) p_hover.tooltips = TOOLTIPS return f
def get_symbol_from_yahoo(symbol, start=None, end=None): px = web.get_data_yahoo(symbol, start=start, end=end) px = pd.DataFrame.rename(px, columns={'Adj Close': 'adj_close'}) px.columns.name = symbol rets = px.adj_close.pct_change().dropna() rets.index = rets.index.tz_localize("UTC") return rets
def vendor_query(vendor, symbol, from_date, to_date): """ Make a web query to data vendor :param vendor: 'QUANDL', 'YAHOO' :param symbol: :param from_date: :param to_date: """ try: if vendor == "QUANDL": web_qry_result = Quandl.get(symbol, trim_start=from_date, trim_end=to_date, authtoken=QUANDL_TOKEN, verbose=False) elif vendor == "YAHOO": web_qry_result = web.get_data_yahoo(symbol, start=from_date, end=to_date) elif vendor == 'CSV': raw_read = pd.read_csv(symbol, index_col='Date', parse_dates=True) web_qry_result = raw_read[from_date:to_date] return web_qry_result except: print('Error querying the vendor')
def load_yahoo_stock(sids, start=None, end=None, dvds=True): if hasattr(sids, '__iter__') and not isinstance(sids, basestring): return Instruments([load_yahoo_stock(sid, start=start, end=end, dvds=dvds) for sid in sids]) else: sid = sids end = end and pd.to_datetime(end) or pd.datetime.now() start = start and pd.to_datetime(start) or end + pd.datetools.relativedelta(years=-1) data = get_data_yahoo(sid, start=start, end=end) data = data.rename(columns=lambda c: c.lower()) if dvds: d = get_dividends_yahoo(sid, start, end) d.columns = ['dvds'] if not d.empty: # sanity check - not expected currently # missing = d.index.difference(data.index) missing = d.index - data.index if len(missing) > 0: raise Exception('dividends occur on non-business day, not expecting this') # another sanity check to ensure yahoo rolls dividends up, in case a special occurs on same day if not d.index.is_unique: d = d.groupby(lambda x: x).sum() data = data.join(d) else: data['dvds'] = np.nan pxs = InstrumentPrices(data) return Instrument(sid, pxs, multiplier=1.)
def load_indices(self, tickers, startdate, lags): self.tickers = tickers self.filename = "DATA.csv" self.startdate = startdate self.enddate = datetime.date.today().strftime("%Y%m%d") if os.path.isfile(self.filename): data = pan.DataFrame.from_csv(self.filename) self.dataframe = data else: for ticker in tickers: data = web.get_data_yahoo(ticker, self.startdate, self.enddate) index = ticker + '1change' data[index] = data['Adj Close'].pct_change(1) #remove unused columns and nan row data = data[[index]] data = data[1:] #filter out middle threshold noise #data = data[np.logical_or(data[index] >= threshold, data[index] <= -threshold)] #preprocess data data = data.apply(preprocess) #lag data for i in range(1, lags + 1): label = ticker + "%dlag" % i data[label] = data[index].shift(i) #remove rows used for change calculation data = data[lags + 1:] print data.head(10) if ticker == "%5EGSPC": self.sp = data else: self.sp = merge(self.sp, data, left_index=True, right_index=True) self.dataframe = self.sp self.dataframe.to_csv(self.filename)
def select_stocks(self, buy_label, sell_label): if self.start_date.weekday() == 5 or self.start_date.weekday() == 6: self.start_date = datetime.datetime(self.start_date.year, self.start_date.month, self.start_date.day - 2) print((self.start_date.date(), self.end_date.date())) for ticker in self.tickers: try: data = pid.get_data_yahoo(ticker, start=self.start_date, end=self.end_date)["Close"] dates = data.index last_price = data[len(dates) - 1] end_term_ret = (last_price - data[self.start_date]) / data[self.start_date] print((ticker, end_term_ret)) self.stocks.append((ticker, end_term_ret, last_price)) except (KeyError, IOError) as e: raise e self.stocks.sort(key=lambda t: t[1]) utils.print_stocks("Stocks sorted on return", self.stocks); decile = int(len(self.stocks) * 0.10) sell_stocks = self.stocks[:decile] buy_stocks = self.stocks[decile * 9:] buy_stocks.reverse() utils.print_stocks("Sell stocks", sell_stocks) utils.print_stocks("Buy stocks", buy_stocks) utils.save_portfolio(sell_label, sell_stocks) utils.save_portfolio(buy_label, buy_stocks)
def get_history(symbols, start, end, data_path, visible=False): """ to get Yahoo data from saved csv files. If the file does not exist for the symbol, data is read from Yahoo finance and the csv saved. symbols: symbol list start, end : datetime start/end dates data_path : datapath for csv files - use double \\ and terminate path with \\ """ symbols_ls = list(symbols) for ticker in symbols: print (ticker,' ') try: #see if csv data available data = pd.read_csv(data_path + ticker + '.csv', index_col='Date', parse_dates=True) except: #if no csv data, create an empty dataframe data = pd.DataFrame(data=None, index=[start]) #check if there is data for the start-end data range if start.toordinal() < data.index[0].toordinal() \ or end.toordinal() > data.index[-1].toordinal(): if visible: print ('Refresh data.. ',) try: new_data = web.get_data_yahoo(ticker, start, end) if new_data.empty==False: if data.empty==False: try: ticker_data = data.append(new_data).groupby(level=0, by=['rownum']).last() except: print ('Merge failed.. ') else: ticker_data = new_data try: ticker_data.to_csv(data_path + ticker + '.csv') if visible: print (' UPDATED.. ') except: print ('Save failed.. ') else: if visible: print ('No new data.. ') except: print ('Download failed.. ') # remove symbol from list symbols_ls.remove(ticker) else: if visible: print ('OK.. ') pass pdata = pd.Panel(dict((symbols_ls[i], pd.read_csv(data_path + symbols_ls[i] + '.csv',\ index_col='Date', parse_dates=True).sort(ascending=True)) for i in range(len(symbols_ls))) ) return pdata.ix[:, start:end, :]
def _wrapped_get_data_yahoo(symbol, start, end): '''Handle the various exceptions that downloading from yahoo raises.''' try: return get_data_yahoo(symbols=symbol, start=start, end=end) except IOError as e: if re.match(r'after \d tries, Yahoo! did not return a 200 for url', e.message): raise ExternalRequestFailed(e.message)
def get_prices_from_yahoo(required_data, type_of_price='Adj Close'): '''Yields date, value pairs.''' # Yahoo errors out if you only ask for recent dates. start, end = required_data.index[0], required_data.index[-1] symbols = list(required_data.columns) new_data = get_data_yahoo(symbols=symbols, start=start, end=end) required_data.update(new_data.Close) return required_data
def get_daily_data_yahoo(tickers,start_date='1/1/2010',end_date='5/1/2015'): daily_data={} for ticker in tickers: try: daily_data[ticker[0]]=web.get_data_yahoo(ticker[1],start_date,end_date) except: print("cannot download %s." % ticker[1]) return daily_data
def setup_class(self): """Bearcart test data and template setup""" import pandas.io.data as web all_data = {} for ticker in ["AAPL", "GOOG"]: all_data[ticker] = web.get_data_yahoo(ticker, "4/1/2013", "5/1/2013") self.price = pd.DataFrame({tic: data["Adj Close"] for tic, data in all_data.iteritems()}) self.templates = Environment(loader=FileSystemLoader("templates"))
def get_history(symbols, start, end, data_path): symbols_ls = list(symbols) for ticker in symbols: print ticker, try: # see if csv data available data = pd.read_csv(data_path + ticker + ".csv", index_col="Date", parse_dates=True) except: # if no csv data, create an empty dataframe data = pd.DataFrame(data=None, index=[start]) # check if there is data for the start-end data range if data.index[-1].toordinal() < end.toordinal() - 3: print "Refresh data.. ", try: new_data = web.get_data_yahoo(ticker, start, end) if new_data.empty == False: if data.empty == False: try: ticker_data = data.append(new_data).groupby(level=0, by=["rownum"]).last() except: print "Merge failed.. " else: ticker_data = new_data try: ticker_data.to_csv(data_path + ticker + ".csv") print " UPDATED.. " except: print "Save failed.. " else: print "No new data.. " except: print "Download failed.. " # remove symbol from list symbols_ls.remove(ticker) else: print "OK.. " pass pdata = pd.Panel( dict( ( symbols_ls[i], pd.read_csv(data_path + symbols_ls[i] + ".csv", index_col="Date", parse_dates=True).sort( ascending=True ), ) for i in range(len(symbols_ls)) ) ) return pdata.ix[:, start:end, :]
def getStock(symbol): all_data = web.get_data_yahoo(symbol,'1/1/2012','12/20/2014') close_px = all_data['Adj Close'] mavg = pandas.rolling_mean(close_px, 50) #print mavg all_data['mvg50'] = pandas.Series(mavg,index=all_data.index) print all_data validma50(all_data) drawChart(all_data)
def test_get_date_ret_index(self): pan = web.get_data_yahoo(["GE", "INTC", "IBM"], "1977", "1987", ret_index=True) self.assert_(hasattr(pan, "Ret_Index")) if hasattr(pan, "Ret_Index") and hasattr(pan.Ret_Index, "INTC"): tstamp = pan.Ret_Index.INTC.first_valid_index() result = pan.Ret_Index.ix[tstamp]["INTC"] self.assertEqual(result, 1.0) # sanity checking assert np.issubdtype(pan.values.dtype, np.floating)
def test_get_data_interval(self): # daily interval data pan = web.get_data_yahoo('XOM', '2013-01-01', '2013-12-31', interval='d') self.assertEqual(len(pan), 252) # weekly interval data pan = web.get_data_yahoo('XOM', '2013-01-01', '2013-12-31', interval='w') self.assertEqual(len(pan), 53) # montly interval data pan = web.get_data_yahoo('XOM', '2013-01-01', '2013-12-31', interval='m') self.assertEqual(len(pan), 12) # dividend data pan = web.get_data_yahoo('XOM', '2013-01-01', '2013-12-31', interval='v') self.assertEqual(len(pan), 4) # test fail on invalid interval self.assertRaises(ValueError, web.get_data_yahoo, 'XOM', interval='NOT VALID')
def test_get_data(self): import numpy as np #single symbol #http://finance.yahoo.com/q/hp?s=GOOG&a=09&b=08&c=2010&d=09&e=10&f=2010&g=d df = web.get_data_yahoo('GOOG') assert df.Volume.ix['OCT-08-2010'] == 2859200 sl = ['AAPL', 'AMZN', 'GOOG'] pan = web.get_data_yahoo(sl, '2012') ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG] assert ts[0].dayofyear == 96 #dfi = web.get_components_yahoo('^DJI') #pan = web.get_data_yahoo(dfi, 'JAN-01-12', 'JAN-31-12') pan = web.get_data_yahoo(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12') expected = [19.02, 28.23, 25.39] result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist() assert result == expected # sanity checking t= np.array(result) assert np.issubdtype(t.dtype, np.floating) assert t.shape == (3,) expected = [[ 18.99, 28.4 , 25.18], [ 18.58, 28.31, 25.13], [ 19.03, 28.16, 25.52], [ 18.81, 28.82, 25.87]] result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values assert (result == expected).all() #Check ret_index pan = web.get_data_yahoo(['GE', 'INTC', 'IBM'], '1977', '1987', ret_index=True) tstamp = pan.Ret_Index.INTC.first_valid_index() result = pan.Ret_Index.ix[tstamp]['INTC'] expected = 1.0 assert result == expected # sanity checking t= np.array(pan) assert np.issubdtype(t.dtype, np.floating)
def get_px(stock, start, end): ''' Takes a stock ticker, start and end date and will return time delimited price data. Parameters: stock- Stock listing, e.g MSFT start- Start date for data collection end -End date for data collection ''' return web.get_data_yahoo(stock,start,end)['Adj Close']
def test_get_date_ret_index(self): pan = web.get_data_yahoo(['GE', 'INTC', 'IBM'], '1977', '1987', ret_index=True) self.assertTrue(hasattr(pan, 'Ret_Index')) if hasattr(pan, 'Ret_Index') and hasattr(pan.Ret_Index, 'INTC'): tstamp = pan.Ret_Index.INTC.first_valid_index() result = pan.Ret_Index.ix[tstamp]['INTC'] self.assertEqual(result, 1.0) # sanity checking self.assertTrue(np.issubdtype(pan.values.dtype, np.floating))
def _remote_fetch(self, start, end): assert (end - start).days <= self.api_limit, ( 'Range is greater than the Yahoo finanace api ' 'limit of %s.' % self.api_limit) def _yahoo_date(date): assert isinstance(date, datetime) return date.strftime('%d/%m/%Y') return get_data_yahoo(self.symbol, start=_yahoo_date(start), end=_yahoo_date(end))
def test_get_data_multiple_symbols_two_dates(self): pan = web.get_data_yahoo(["GE", "MSFT", "INTC"], "JAN-01-12", "JAN-31-12") result = pan.Close.ix["01-18-12"] self.assertEqual(len(result), 3) # sanity checking assert np.issubdtype(result.dtype, np.floating) expected = np.array([[18.99, 28.4, 25.18], [18.58, 28.31, 25.13], [19.03, 28.16, 25.52], [18.81, 28.82, 25.87]]) result = pan.Open.ix["Jan-15-12":"Jan-20-12"] self.assertEqual(expected.shape, result.shape)
def setup_class(self): '''Bearcart test data and template setup''' import pandas.io.data as web all_data = {} for ticker in ['AAPL', 'GOOG']: all_data[ticker] = web.get_data_yahoo(ticker, '4/1/2013', '5/1/2013') self.price = pd.DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()}) self.templates = Environment(loader=FileSystemLoader('templates'))
import pandas as pd import matplotlib.pyplot as plt from pandas.io.data import get_data_yahoo ##### download data from Yahoo ##### # Download the S&P 500 SP500 = get_data_yahoo('^GSPC', start='1950-01-03') ##### plot the data ##### # plot the S&P 500 index ax1 = SP500['Close'].plot() # add labels, axes, title, etc ax1.set_ylabel('Close') ax1.set_yscale('log') ax1.set_title('Historical S&P 500 Index', weight='bold') # load the NBER recession dates NBER_Dates = pd.read_csv('NBER Dates.txt') # for loop generates recession bands! for i in range(NBER_Dates.shape[0]): ax1.axvspan(NBER_Dates['Peak'][i], NBER_Dates['Trough'][i], facecolor='grey', alpha=0.5) # save the figure and display plt.savefig('2012-12-21-SP500.png') plt.show()
covariance: -measure of the strength of the correlation between two or more sets of random variates -positive, negative, zero -http://mathworld.wolfram.com/Covariance.html -http://stats.stackexchange.com/questions/18058/how-would-you-explain-covariance-to-someone-who-understands-only-the-mean -http://math.tutorvista.com/statistics/covariance.html """ import numpy as np import pandas as pd import pandas.io.data as web alldata = {} for ticker in ["AAPL", "IBM", "MSFT", "GOOG"]: alldata[ticker] = web.get_data_yahoo(ticker, "1/1/2010", "1/1/2015") alldata["AAPL"].keys() # Index([u'Open', u'High', u'Low', u'Close', u'Volume', u'Adj Close'], dtype='object') price = pd.DataFrame( {tic: data["Adj Close"] for tic, data in alldata.iteritems()}) # [1258 rows x 3 columns] volume = pd.DataFrame( {tic: data["Volume"] for tic, data in alldata.iteritems()}) # [1258 rows x 3 columns] # compute changes in price returns = price.pct_change() returns.tail() ''' AAPL GOOG IBM MSFT Date
import datetime as dt import pandas as pd import numpy as np import matplotlib.pyplot as plt import statsmodels.api as sm import pandas.io.data as web # step 1: Range Selection st = dt.datetime(2000,12,1) en = dt.datetime(2015,1,1) sp500_tickers_lil = ['AA','AAPL','ABC','ABT','ADBE','ADI','ADM','ADP','ADSK','AEE'] sp500_tickers_joey = ['AA','AAPL','ABC','ABT','ADBE','ADI','ADM','ADP','ADSK','AEE','AEP','AES','AET','AFL','AGN','AIG','AIV','AIZ','AKAM','ALL','AMAT','AMGN','AMP','AMT','AMZN','AN','ANTM','AON','APA','APC','APD','AVB','AVY','AXP','AZO','BA','BAC','BAX','BBBY','BBT','BBY','BCR','BDX','BEN',,'BHI','BIIB','BK','BLL','BMY','BRCM','BSX','BXP','C','CA','CAG','CAH','CAT','CB','CBG','CBS','CCE','CCL','CELG','CHK','CHRW','CI','CINF','CL','CLX','CMA','CMCSA','CME','CMI','CMS','CNP','CNX','COF','COH','COL','COP','COST','CPB','CSCO','CSX','CTAS','CTL','CTSH','CTXS','CVS','CVX','D','DD','DE','DFS','DGX','DHI','DHR','DIS','DOV','DOW','DRI','DTE','DUK','DVN','EA','EBAY','ECL','ED','EFX','EIX','EL','EMC','EMN','EMR','EOG','EQR','ESRX','ETFC','ETN','ETR','EXC','EXPD','EXPE','F','FCX','FDX','FE','FIS','FISV','FITB','FLR','FOXA','FTR','GAS','GD','GE','GILD','GIS','GLW','GME','GOOGL','GPC','GPS','GS','GT','GWW','HAL','HAR','HAS','HBAN','HD','HES','HIG','HOG','HON','HOT','HPQ','HRB','HST','HSY','HUM','IBM','ICE','IFF','INTC','INTU','IP','IPG','ITW','JCI','JEC','JNJ','JNPR','JPM','JWN','K','KEY','KIM','KLAC','KMB','KO','KR','KSS','L','LB','LEG','LEN','LH','LLL','LLTC','LLY','LM','LMT','LNC','LOW','LUK','LUV','M','MAR','MAS','MAT','MCD','MCHP','MCK','MCO','MDLZ','MDT','MET','MHFI','MKC','MMC','MMM','MO','MON','MRK','MRO','MS','MSFT','MSI','MTB','MU','MUR','MYL','NBL','NEE','NEM','NI','NKE','NOC','NOV','NSC','NTAP','NTRS','NUE','NVDA','NWL','OMC','ORCL','OXY','PAYX','PBI','PCAR','PCG','PCL','PCP','PDCO','PEG','PEP','PFE','PFG','PG','PGR','PH','PHM','PKI','PLD','PNC','PNW','POM','PPG','PPL','PRU','PSA','PX','QCOM','R','RAI','RF','RHI','RL','ROK','RRC','RTN','SBUX','SCHW','SE','SEE','SHW','SLB','SNA','SNDK','SO','SPG','SPLS','SRE','STI','STJ','STT','STZ','SWK','SYK','SYMC','SYY','T','TAP','TDC','TE','TGT','THC','TIF','TJX','TMK','TMO','TROW','TRV','TSN','TSO','TSS','TWX','TXN','TXT','UNH','UNM','UNP','UPS','USB','UTX','VAR','VFC','VIAB','VLO','VMC','VNO','VRSN','VZ','WAT','WBA','WFC','WFM','WHR','WM','WMB','WMT','WU','WY','WYN','XEL','XL','XLNX','XOM','XRX','YHOO','YUM','ZBH','ZION'] spy = web.get_data_yahoo('SPY',start = st,end = en)['Adj Close'] spy_data = spy.resample("M",how = 'last').pct_change() regr_coefs = [] dataset = sp500_tickers_joey #dataset = ['AA'] for ticker in dataset: try: data = web.get_data_yahoo(ticker, start=st, end=en).resample("M",how = 'last')['Adj Close'] except: print(ticker, "error") continue data_pct = data.pct_change().dropna() #returns = pd.DataFrame(index = data.index,columns = [1,2,3,4,5,6,7,8,9,10,11,12,'spy'])
import pandas as pd import matplotlib.pyplot as plt from pandas.io.data import get_data_yahoo, get_data_fred ##### download data ##### # Download the S&P 500 SP500 = get_data_yahoo('^GSPC', start='1950-01-03', end='2012-11-30') # Download the CPI data CPIAUCSL = get_data_fred('CPIAUCSL', start='1950-01-01') ##### resample S&P 500 data ##### # Need S&P 500 data to be monthly...note I am taking monthly averages monthly_avg_SP500 = SP500.resample('MS', how='mean') # Add the CPI data as a column to the monthly DataFrame monthly_avg_SP500['CPIAUCSL'] = CPIAUCSL ##### Convert nominal values to real values ##### # express all prices in terms of the price level in Nov. 2012... monthly_avg_SP500['Price Deflator'] = ( monthly_avg_SP500['CPIAUCSL']['2012-11-01'] / monthly_avg_SP500['CPIAUCSL']) monthly_avg_SP500['Close (Real)'] = monthly_avg_SP500[ 'Close'] * monthly_avg_SP500['Price Deflator'] ##### Nominal S&P 500 #####
# IPython log file plt(np.random.randn(100).cumsum()) plot(np.random.randn(100).cumsum()) plt.figure() from pandas.io.data import get_data_yahoo spy_close = get_data_yahoo('SPY'['Adj Close'] asdfas from pandas.io.data import get_data_yahoo spy_close = get_data_yahoo('SPY')['Adj Close'] spy_close spy_close.plot() _ __ a a = 1 b = 2 c = 3 __ _ a _ __ _ ____ _ix _i19 _i15 exec _i15 c c = 4
def test_get_data_single_symbol(self): #single symbol #http://finance.yahoo.com/q/hp?s=GOOG&a=09&b=08&c=2010&d=09&e=10&f=2010&g=d # just test that we succeed web.get_data_yahoo('GOOG')
import numpy as np import pandas as pd import pandas.io.data as web #goog=web.DataReader('GOOG',data_source='google',start='1/21/2013',end='4/6/2015') goog=web.get_data_yahoo('000009.sz',start='1/21/2013',end='4/4/2015') print goog.head()
def test_get_data_multiple_symbols(self): # just test that we succeed sl = ['AAPL', 'AMZN', 'GOOG'] web.get_data_yahoo(sl, '2012')
# -*- coding: utf-8 -*- """ Vincent Area Examples """ #Build an Area Chart from scratch from vincent import * import pandas.io.data as web all_data = {} for ticker in ['AAPL', 'GOOG', 'IBM', 'YHOO', 'MSFT']: all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2010', '1/1/2013') price = pd.DataFrame( {tic: data['Adj Close'] for tic, data in all_data.items()}) vis = Visualization(width=500, height=300) vis.padding = {'top': 10, 'left': 50, 'bottom': 50, 'right': 100} vis.scales['x'] = Scale(name='x', type='time', range='width', domain=DataRef(data='table', field="data.idx")) vis.scales['y'] = Scale(name='y', range='height', type='linear', nice=True, domain=DataRef(data='table', field="data.val")) vis.scales['color'] = Scale(name='color',
import pandas.io.data as web import pandas as pd import numpy as np import talib as ta import matplotlib.pyplot as plt from matplotlib.dates import date2num from matplotlib.finance import candlestick # Download sample data spy = web.get_data_yahoo('SPY', '2010-01-01') # Data for matplotlib finance plot spy_ochl = np.array( pd.DataFrame({ '0': date2num(spy.index), '1': spy.Open, '2': spy.Close, '3': spy.High, '4': spy.Low })) # Technical Analysis SMA_FAST = 34 SMA_SLOW = 144 RSI_PERIOD = 14 analysis = pd.DataFrame(index=spy.index) analysis['sma_f'] = pd.rolling_mean(spy.Close, SMA_FAST) analysis['sma_s'] = pd.rolling_mean(spy.Close, SMA_SLOW) analysis['rsi'] = ta.RSI(spy.Close, RSI_PERIOD) # Record signals (open position after crossover)
# -*- coding: utf-8 -*- import numpy as np import pandas.io.data as web from pandas import DataFrame print( '相关性与协方差') # 协方差:https://zh.wikipedia.org/wiki/%E5%8D%8F%E6%96%B9%E5%B7%AE all_data = {} for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']: all_data[ticker] = web.get_data_yahoo(ticker, '4/1/2016', '7/15/2015') price = DataFrame( {tic: data['Adj Close'] for tic, data in all_data.items()}) volume = DataFrame({tic: data['Volume'] for tic, data in all_data.items()}) returns = price.pct_change() print(returns.tail()) print(returns.MSFT.corr(returns.IBM)) print(returns.corr()) # 相关性,自己和自己的相关性总是1 print(returns.cov()) # 协方差 print(returns.corrwith(returns.IBM)) print(returns.corrwith(returns.volume))
multi_iter2 = {'index': index_2} for cat in cat_2: multi_iter2[cat] = [random.randint(10, 100) for x in index_2] farm_1 = {'apples': 10, 'berries': 32, 'squash': 21, 'melons': 13, 'corn': 18} farm_2 = {'apples': 15, 'berries': 43, 'squash': 17, 'melons': 10, 'corn': 22} farm_3 = {'apples': 6, 'berries': 24, 'squash': 22, 'melons': 16, 'corn': 30} farm_4 = {'apples': 12, 'berries': 30, 'squash': 15, 'melons': 9, 'corn': 15} farm_data = [farm_1, farm_2, farm_3, farm_4] farm_index = ['Farm 1', 'Farm 2', 'Farm 3', 'Farm 4'] df_farm = pd.DataFrame(farm_data, index=farm_index) #As DataFrames index_3 = multi_iter2.pop('index') df_1 = pd.DataFrame(multi_iter2, index=index_3) df_1 = df_1.reindex(columns=sorted(df_1.columns)) cat_4 = ['Metric_' + str(x) for x in range(0, 10, 1)] index_4 = ['Data 1', 'Data 2', 'Data 3', 'Data 4'] data_3 = {} for cat in cat_4: data_3[cat] = [random.randint(10, 100) for x in index_4] df_2 = pd.DataFrame(data_3, index=index_4) import pandas.io.data as web all_data = {} for ticker in ['AAPL', 'IBM', 'YHOO', 'MSFT']: all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2012', '1/1/2014') price = pd.DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()})
import numpy as np import pandas as pd import pandas.io.data as web goog = web.get_data_yahoo('GOOG', '3/14/2009', '2/14/2014') goog['Log_Ret'] = np.log(goog['Close'] / goog['Close'].shift(1)) goog['Volatility'] = pd.rolling_std(goog['Log_Ret'], window=252) * np.sqrt(252) #%matplotlib inline #goog[['Close','Volatility']].plot(subplots=True,color='blue',figsize=(20,12)) print goog raw_input('hello') print('Hello World')
import matplotlib.pyplot as plt from matplotlib import rcParams import pandas as pd from pandas.io.data import get_data_yahoo plt.ioff() c1, c2 = rcParams['axes.color_cycle'][:2] stocks = ['LINE', 'LNCO'] prices = get_data_yahoo(stocks, start='1/1/2000')['Adj Close'].dropna() day1 = str(prices.index.min().date()) day2 = str(prices.index.max().date()) fig = plt.figure() ax = fig.add_subplot(111) prices.plot(ax=ax) ax.fill_between(prices.index, prices['LINE'], prices['LNCO'], alpha=0.5, where=prices['LINE'] >= prices['LNCO'], facecolor=c1) ax.fill_between(prices.index, prices['LINE'], prices['LNCO'], alpha=0.5, where=prices['LINE'] < prices['LNCO'], facecolor=c2) ax.set_title("LINE and LNCO: %s to %s" % (day1, day2))
import numpy as np import pandas as ps import pandas.io.data as pdweb import datetime price = pdweb.get_data_yahoo(['CVX', 'XOM', 'BP'], start=datetime.datetime(2010, 1, 1), end=datetime.datetime(2013, 1, 1))['Adj Close'] print(price.head())
'''\ Rev: 1 Author: silentshadow Description: feed investment portfolio data into pandas Reference: http://www.ibm.com/developerworks/cloud/library/cl-datascienceincloud/ ''' import pandas.io.data as web import matplotlib.pyplot as plt from pandas import DataFrame data_feed = {} symbols = ['AAPL', 'FB', 'TSLA', 'GOOG'] for ticker in symbols: data_feed[ticker] = web.get_data_yahoo(ticker, '03/1/2016', '04/1/2016') price = DataFrame({tic: data['Adj Close'] for tic, data in data_feed.items()}) volume = DataFrame({tic: data['Volume'] for tic, data in data_feed.items()}) returns = price.pct_change() returns.sum().plot(kind='bar', title="% Return For Year") plt.show()
#! /usr/bin/env python # -*- coding:utf-8 -*- """ @author : MG @Time : 19-4-11 上午9:11 @File : candle_demo2.py.py @contact : [email protected] @desc : """ #The following example, downloads stock data from Yahoo and plots it. from pandas.io.data import get_data_yahoo import matplotlib.pyplot as plt from matplotlib.pyplot import subplots, draw from matplotlib.finance import candlestick symbol = "GOOG" data = get_data_yahoo(symbol, start = '2013-9-01', end = '2013-10-23')[['Open','Close','High','Low','Volume']] ax = subplots() candlestick(ax,data['Open'],data['High'],data['Low'],data['Close']) if __name__ == "__main__": pass
df.sum() df.sum(axis=1) df.mean(axis=1, skipna=False) df.idxmax() df.cumsum() df.describe() obj = Series(['a', 'a', 'b', 'c'] * 4) obj.describe() #相关系数 import pandas.io.data as web all_data = {} for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']: all_data[ticker] = web.get_data_yahoo(ticker) price = DataFrame({tic: data['Adj Close'] for tic, data in all_data.iteritems()}) volume = DataFrame({tic: data['Volume'] for tic, data in all_data.iteritems()}) returns = price.pct_change() returns.tail() returns.MSFT.corr(returns.IBM) returns.MSFT.cov(returns.IBM) returns.corr() returns.cov() returns.corrwith(returns.IBM) returns.corrwith(volume)
############################################################################## # # An example of converting a Pandas dataframe with stock data taken from the # web to an xlsx file with a line chart using Pandas and XlsxWriter. # # Copyright 2013-2020, John McNamara, [email protected] # import pandas as pd import pandas.io.data as web # Create some sample data to plot. all_data = {} for ticker in ['AAPL', 'GOOGL', 'IBM', 'YHOO', 'MSFT']: all_data[ticker] = web.get_data_yahoo(ticker, '5/1/2014', '5/1/2015') # Create a Pandas dataframe from the data. df = pd.DataFrame({tic: data['Adj Close'] for tic, data in all_data.items()}) # Create a Pandas Excel writer using XlsxWriter as the engine. sheet_name = 'Sheet1' writer = pd.ExcelWriter('pandas_chart_stock.xlsx', engine='xlsxwriter') df.to_excel(writer, sheet_name=sheet_name) # Access the XlsxWriter workbook and worksheet objects from the dataframe. workbook = writer.book worksheet = writer.sheets[sheet_name] # Adjust the width of the first column to make the date values clearer. worksheet.set_column('A:A', 20)
#!/bin/env python # import pandas as pd import pandas.io.data as web from qrzigzag import peak_valley_pivots, max_drawdown, compute_segment_returns, pivots_to_modes X = web.get_data_yahoo('GOOG')['Adj Close'] pivots = peak_valley_pivots(X, 0.2, -0.2) ts_pivots = pd.Series(X, index=X.index) ts_pivots = ts_pivots[pivots != 0] X.plot() ts_pivots.plot(style='g-o')
import numpy as np import pandas as pd from pandas import DataFrame,Series import matplotlib.pyplot as plt import pandas.io.data as pdweb import datetime prices=pdweb.get_data_yahoo(["CVX","XOM","BP"],start=datetime.datetime(2010,1,1), end=datetime.datetime(2013,1,1))["Adj Close"] print(prices.head()) print("="*50) volume=pdweb.get_data_yahoo(["CVX","XOM","BP"],start=datetime.datetime(2010,1,1), end=datetime.datetime(2013,1,1))["Volume"] print(volume.head()) print("="*50) rets=prices.pct_change() #call percentage change #get correlation corr=rets.corr #plotting it prices.plot() plt.show() import seaborn as sns sns.corrplot(rets,annot=False,diag_names=False)
def get_benchmark_series(benchmark): return web.get_data_yahoo([benchmark], fromDate)["Close"][benchmark]
def get_px(stock, start, end): return web.get_data_yahoo(stock, start, end)['Adj Close']
def test_get_data_single_symbol(self): #single symbol #http://finance.yahoo.com/q/hp?s=GOOG&a=09&b=08&c=2010&d=09&e=10&f=2010&g=d df = web.get_data_yahoo('GOOG') self.assertEqual(df.Volume.ix['OCT-08-2010'], 2859200)
def getPerf(self, symbol, param, strategy, enddatestr=""): ret = {} self.setDateRange(enddatestr) try: ohlc = web.get_data_yahoo(symbol, self.startdate, self.enddate) except: # IO error print "System/Network Error when retrieving ", symbol, " skip it" return ret # calculate perf px = ohlc['Adj Close'] #additional indicator for gy in strategy: if gy not in self.stgyInx: self.stgyInx[gy] = self.loadStrategy(gy) self.stgyInx[gy].runIndicator(px, strategy[gy]) p1d = 0 p4w = 0 p12w = 0 p24w = 0 pmax = 0 plen = len(px) if plen >= 2: p1d = round((px[-1] / px[-2] - 1) * 100, 2) if plen >= 4 * 7: p4w = round((px[-1] / px[-4 * 7] - 1) * 100, 2) if plen >= 12 * 7: p12w = round((px[-1] / px[-12 * 7] - 1) * 100, 2) if plen >= 24 * 7: p24w = round((px[-1] / px[-24 * 7] - 1) * 100, 2) if len(px) >= 1 * 7: p1w = round((px[-1] / px[-1 * 7] - 1) * 100, 2) pmax = round((px[-1] / px[0] - 1) * 100, 2) if 'vol20' in param: sma20vol = pandas.stats.moments.rolling_mean(ohlc['Volume'], 20) ret['vol20'] = sma20vol[-1] if 'vol' in param: ret['vol'] = ohlc['Volume'][-1] if 'px' in param: ret['px'] = ohlc['Adj Close'][-1] if 'ma10' in param: sma10s = pandas.stats.moments.rolling_mean(px, 10) ret['ma10'] = round(sma10s[-1], 2) if 'ma50' in param: sma50s = pandas.stats.moments.rolling_mean(px, 50) ret['ma50'] = round(sma50s[-1], 2) if 'ma200' in param: sma200s = pandas.stats.moments.rolling_mean(px, 200) ret['ma200'] = round(sma200s[-1], 2) ret['p1d'] = p1d ret['p1w'] = p1w ret['p4w'] = p4w ret['p12w'] = p12w ret['p24w'] = p24w return ret
df.idxmax() df.idxmin(axis=1) df.cumsum() df.describe() obj = Series(['a', 'a', 'b', 'c'] * 4) obj.describe() df['three'] = ['a', 'b', 'c', 'a'] df.describe() df['three'].describe() """ Correlation and Covariance """ import pandas.io.data as web all_data = {} for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']: all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2000', '1/1/2010') price = DataFrame( {tic: data['Adj Close'] for tic, data in all_data.iteritems()}) volume = DataFrame({tic: data['Volume'] for tic, data in all_data.iteritems()}) returns = price.pct_change() returns.tail() returns.MSFT.corr(returns.IBM) returns.MSFT.cov(returns.IBM) returns.corr() returns.cov() returns.corrwith(returns.IBM) returns.corrwith(volume) """ Unique Values, Value Count, and Membership """