def run(self): """Collects data from Yahoo finance and preprocesses them.""" # Get reference dates from SP500 df_ref = DataReader('SPY', 'yahoo', self.start, self.end)['Adj Close'] df_ref.sort_index(inplace=True) if 'SPY' in assets: self.df = df_ref self.df.rename({'AdjClose': 'SPY'}) else: self.df = pd.DataFrame(index=df_ref.index) # Retrieve AdjClose price for other assets for asset in self.assets: df_asset = DataReader(asset, 'yahoo', self.start, self.end)['Adj Close'] df_asset.rename(asset, inplace=True) self.df = self.df.join(df_asset, how='left') # Fill NaN self.df.fillna(method='ffill', inplace=True) self.df.fillna(method='bfill', inplace=True) # Compute assets simple returns self.df = (self.df / self.df.shift(1) - 1.0).ix[1:, :] # Reset indices self.df.reset_index(drop=True, inplace=True)
def Econ_env(YYYY, m, dd): start_date = datetime.datetime(YYYY, m, dd) GDP = DataReader('GDP', "fred", start=start_date) sp500 = DataReader('^GSPC', "yahoo", start=start_date) Array = DataFrame({'S&P':sp500["Adj Close"]}) return Array
def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None): """Load closing prices from yahoo finance. :Optional: indexes : dict (Default: {'SPX': '^GSPC'}) Financial indexes to load. stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT', 'XOM', 'AA', 'JNJ', 'PEP', 'KO']) Stock closing prices to load. start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)) Retrieve prices from start date on. end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)) Retrieve prices until end date. :Note: This is based on code presented in a talk by Wes McKinney: http://wesmckinney.com/files/20111017/notebook_output.pdf """ assert indexes is not None or stocks is not None, """ must specify stocks or indexes""" if start is None: start = pd.datetime(1990, 1, 1, 0, 0, 0, 0, pytz.utc) if start is not None and end is not None: assert start < end, "start date is later than end date." data = OrderedDict() if stocks is not None: for stock in stocks: print(stock) stock_pathsafe = stock.replace(os.path.sep, '--') cache_filename = "{stock}-{start}-{end}.csv".format( stock=stock_pathsafe, start=start, end=end).replace(':', '-') cache_filepath = get_cache_filepath(cache_filename) if os.path.exists(cache_filepath): stkd = pd.DataFrame.from_csv(cache_filepath) else: stkd = DataReader(stock, 'yahoo', start, end).sort_index() stkd.to_csv(cache_filepath) data[stock] = stkd if indexes is not None: for name, ticker in iteritems(indexes): print(name) stkd = DataReader(ticker, 'yahoo', start, end).sort_index() data[name] = stkd return data
def create_lagged_series(symbol, start_date, end_date, lags=5): """ 这个函数创建一个pandas的DataFrame,存储某个来自于Yahoo财经的股票 的以调整收盘价计算的收益,以及一系列滞后的收益,还包括交易量以及某一天 变动的方向 """ ts = DataReader(symbol, "yahoo", start_date - datetime.timedelta(days=365), end_date) tslag = pd.DataFrame(index=ts.index) tslag["Today"] = ts["Adj Close"] tslag["Volume"] = ts["Volume"] for i in range(0, lags): tslag["Lag%s" % str(i + 1)] = ts["Adj Close"].shift(i + 1) tsret = pd.DataFrame(index=tslag.index) tsret["Volume"] = tslag["Volume"] tsret["Today"] = tslag["Today"].pct_change() * 100.0 for i, x in enumerate(tsret["Today"]): if (abs(x) < 0.0001): tsret["Today"][i] = 0.0001 for i in range(0, lags): tsret["Lag%s" % str(i + 1)] = tslag["Lag%s" % str(i + 1)].pct_change() * 100.0 tsret["Direction"] = np.sign(tsret["Today"]) tsret = tsret[tsret.index >= start_date] return tsret
def get_prices_df(self,ticker, date_start, date_end): try: cotation_data = DataReader(ticker, "yahoo", date_start, date_end) cotation_data = cotation_data[cotation_data.Volume != 0] # on ne prend pas les jours feriés p.ex 01/01 except Exception as e: raise ErrorInternetConnexion('yahoo DataReader', e) return cotation_data
def stockhistorynobackfilltodataframeusingcache(symbol, fromdate, todate): print('--------------------------') print('Initialized pullprices.stockhistorydailytodataframeusingcache') import pandas as pd #import numpy as np from pandas.io.data import DataReader #from datetime import datetime, timedelta import config mycachefolder = config.mycachefolder import mytools mytools.general().make_sure_path_exists(mycachefolder) cachedfilepathname = mycachefolder + '\\stockhistorynobackfill ' + symbol + ' ' + fromdate + ' ' + todate + '.csv' import os if os.path.isfile(cachedfilepathname): print(' Found cached file: ' + cachedfilepathname) df_hist = pd.read_csv(cachedfilepathname, index_col=0) else: print(' Getting new file:' + cachedfilepathname) df_hist = DataReader(symbol, "yahoo", fromdate, todate) df_hist.to_csv(cachedfilepathname, columns=('Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close')) return df_hist
def stockhistory(symbol, fromdate, todate): from pandas.io.data import DataReader #from datetime import datetime #dfromdate = fromdate.strftime('%b%d') #datetime(2000,1,1), datetime(2012,1,1) hist = DataReader(symbol, "yahoo", fromdate, todate) return hist
def test_read_famafrench(self): for name in ("F-F_Research_Data_Factors", "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3", "F-F_ST_Reversal_Factor"): ff = DataReader(name, "famafrench") assert ff assert isinstance(ff, dict)
def test_read_famafrench(self): for name in ("F-F_Research_Data_Factors", "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3", "F-F_ST_Reversal_Factor", "F-F_Momentum_Factor"): ff = DataReader(name, "famafrench") self.assertTrue(ff is not None) self.assertIsInstance(ff, dict)
def ADF(ticker, start, end): print('ADF') stock = DataReader(ticker, "yahoo", start, end) result = ts.adfuller(stock['Adj Close'], 1) print(result) print('') test = result[0] crit = result[4] one = crit['1%'] five = crit['5%'] ten = crit['10%'] if test < one: print('Lesser than 1%') print('-----------------------------------------') return stock if test < five: print('Lesser than 5%') print('-----------------------------------------') return stock if test < ten: print('Lesser than 10%') print('-----------------------------------------') return stock print('Cannot reject Null Hypothesis') print('-----------------------------------------') return stock
def fetch_timeseries(symbol, dir_name='data', use_cache=True): """ Read time series data. Use cached version if it exists and use_cache is True, otherwise retrive, cache, then read. """ base_dir = '' try: conf = pf.read_config() base_dir = conf['base_dir'] except: pass finally: dir_name = os.path.join(base_dir, dir_name) if not os.path.exists(dir_name): os.makedirs(dir_name) timeseries_cache = os.path.join(dir_name, symbol + '.csv') if os.path.isfile(timeseries_cache) and use_cache: pass else: ts = DataReader(symbol, 'yahoo', start=datetime.datetime(1900, 1, 1)) ts.to_csv(timeseries_cache, encoding='utf-8') ts = pd.read_csv(timeseries_cache, index_col='Date', parse_dates=True) ts = _adj_column_names(ts) return ts
def historical_pandas_yahoo(symbol, source='yahoo', start=None, end=None): ''' Fetch from yahoo! finance historical quotes ''' #NOTE Panel for multiple symbols ? #NOTE Adj Close column name not cool (a space) return DataReader(symbol, source, start=start, end=end)
def importData(self): """ Import (New) Data from Yahoo. """ start = self._getLatestDate() end = self._getTodaysDate() Logger.log(logging.INFO, "Loading Data", {"scope":__name__, "tickerCode":self._tickerCode, "start":str(start), "end":str(end)}) self._data = DataReader(self._tickerCode, "yahoo", start, end) self._data['Code'] = self._tickerCode for item in ['Open', 'High', 'Low']: self._data[item] = self._data[item] * self._data['Adj Close'] / self._data['Close'] self._data.drop('Close', axis=1, inplace=True) self._data.rename(columns={'Adj Close':'Close'}, inplace=True) self._data['Volume'] = self._data['Volume'].astype(float) connection = sqlite3.connect(pyswing.database.pySwingDatabase) query = "insert or replace into Equities (Date, Open, High, Low, Volume, Close, Code) values (?,?,?,?,?,?,?)" connection.executemany(query, self._data.to_records(index=True)) connection.commit() connection.close()
def peak_begin_dates(start="01/01/1972", end=datetime.now()): """ Use the fred dataset `USRECQ` to determine the beginning of the peaks before all recessions between dates start and end Parameters ---------- start : string or datetime.datetime, optional(default='01/01/1972') A string or other acceptable pandas date identifier that marks the beginning of the window for which we will search for starts of peaks end : string or datetime.datetime, optional(default=datetime.now()) The ending date of the search window Returns ------- rec_startind : pd.DatetimeIndex A pandas DatetimeIndex representing the starting points of each "peak" from start to end """ # Get quarterly recession dates from FRED rec_dates = DataReader("USRECQ", "fred", start=start) one_vals = np.where(rec_dates == 1)[0] rec_start = [one_vals[0]] # Find the beginning of the recession dates (Don't include ones that # begin within three years of a previous one -- hence the `+12`) for d in one_vals: if d > max(rec_start) + 12: rec_start.append(d) rec_startind = rec_dates.index[rec_start] return rec_startind
def historical_volatility(sym, days): "Return the annualized stddev of daily log returns of `sym`." try: quotes = DataReader(sym, 'yahoo')['Close'][-days:] except Exception, e: print "Error getting data for symbol '{}'.\n".format(sym), e return None, None
def get_data(stock, starttime, endtime): ibm = DataReader(stock, 'yahoo', starttime, endtime) #print(ibm['Adj Close']) daily_returns = deque(maxlen=c.normalize_std_len) daily_ret_arr = [] size = len(ibm['Adj Close']) return_array = [] i=0 lastAc = ibm['Adj Close'][0] for stock in ibm['Adj Close']: return_array.append(stock) i+=1 #for rec_date in (c.start + timedelta(days=n) for n in xrange((c.end-c.start).days)): #idx = next(i for i,d in enumerate(segment_start_dates) if rec_date >= d) try: #d = rec_date.strftime("%Y-%m-%d") ac = stock daily_return = (ac - lastAc)/lastAc #if len(daily_returns) == daily_returns.maxlen: # seq[idx].append(daily_return/np.std(daily_returns)) daily_returns.append(daily_return*scale) daily_ret_arr.append(daily_return*scale) lastAc = ac #print "---" #print stock #print daily_return except KeyError: pass print "Records found:" + str(len(daily_ret_arr)) return daily_ret_arr, return_array
def set_source(self, source, tickers, start, end): prices = pd.DataFrame() counter = 0. for ticker in tickers: try: self._logger.info('Loading ticker %s' % (counter / len(tickers))) prices[ticker] = DataReader(ticker, source, start, end).loc[:, 'Close'] except Exception as e: self._logger.error(e) pass counter += 1 events = [] for row in prices.iterrows(): timestamp = row[0] series = row[1] vals = series.values indx = series.index for k in np.random.choice(len(vals), replace=False, size=len(vals)): # Shuffle! if np.isfinite(vals[k]): events.append((timestamp, indx[k], vals[k])) self._source = events self._logger.info('Loaded data!')
def stockhistorybackfilledtodictionary(symbol, fromdate, todate): from pandas.io.data import DataReader from datetime import datetime, timedelta hist = DataReader(symbol, "yahoo", fromdate, todate) date_format = "%Y-%m-%d" d = datetime.strptime(fromdate, date_format) delta = timedelta(days=1) last_adjclose = 'NaN' dictAdjClose = {} while d <= datetime.strptime(todate, date_format): #print(d.strftime(date_format)) d_string = d.strftime(date_format) if d_string in hist.index: last_adjclose = hist.ix[d_string]['Adj Close'] print(d_string, last_adjclose) else: print(d_string, 'nothing', last_adjclose) dictAdjClose[d_string] = [('AdjClose', last_adjclose)] d += delta return dictAdjClose
def main(): ''' 1. The data from Yahoo! Finance is not grabbed by calling url apis, is by using Pandas APIs. 2. This program is to get TWSE data only, if wants OTC data, need to modify code. ''' #Setup figure stock_fig = plt.figure() stock_plt = plt.subplot2grid((1, 1), (0, 0), colspan=1) stock_title = "{} day price".format(stock_num) plt.suptitle(stock_title) startday = dtime.date(2000, 1, 1) # Add ".TW" to tell yahoo!Finance to query TWSE stock data. # If want to query OTC, please add ".TWO" stock_str = "{}.TW".format(stock_num) #print stock_str #about how the DataReader() works, please refer to data.py from pandas try: stock_data = DataReader(stock_str, 'yahoo', startday) #Clear the current axes stock_plt.cla() #Turn the axes grids on stock_plt.grid(True) #plot date and price stock_plt.plot(stock_data.index, stock_data['Close']) #show plt.show() except: exit("Error happened!!\nTry: python TwanStkEx1.py 2330")
def DownloadStocks(self, startingDate, endDate): for stock in self._names: print("Getting data from {0}...".format(stock)) stockData = DataReader(stock, "google", startingDate, endDate) self._columns = stockData.columns print(" Number of lines:{0}".format(stockData.shape[0])) self._data.append(stockData) return self._data
def load_from_yahoo(indexes=None, stocks=None, start=None, end=None): """Load closing prices from yahoo finance. :Optional: indexes : dict (Default: {'SPX': '^GSPC'}) Financial indexes to load. stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT', 'XOM', 'AA', 'JNJ', 'PEP', 'KO']) Stock closing prices to load. start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)) Retrieve prices from start date on. end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)) Retrieve prices until end date. :Note: This is based on code presented in a talk by Wes McKinney: http://wesmckinney.com/files/20111017/notebook_output.pdf """ if indexes is None: indexes = {'SPX': '^GSPC'} if stocks is None: stocks = ['AAPL', 'GE', 'IBM', 'MSFT', 'XOM', 'AA', 'JNJ', 'PEP', 'KO'] if start is None: start = pd.datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc) if end is None: end = pd.datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc) assert start < end, "start date is later than end date." data = OrderedDict() for stock in stocks: print stock stkd = DataReader(stock, 'yahoo', start, end).sort_index() data[stock] = stkd for name, ticker in indexes.iteritems(): print name stkd = DataReader(ticker, 'yahoo', start, end).sort_index() data[name] = stkd df = pd.DataFrame({key: d['Close'] for key, d in data.iteritems()}) df.index = df.index.tz_localize(pytz.utc) return df
def get_stock_history(stock): prices = None try: start_date = date.today() - timedelta(days=365) prices = DataReader(stock, "yahoo", start=start_date) except (HTTPError, BadStatusLine): pass return prices
def downloadStock(ticker, dataSource, start, end): gtemp = pd.DataFrame() try: gtemp = DataReader(ticker, dataSource, start, end) print ticker except: pass return gtemp
def test_read_famafrench(self): raise nose.SkipTest('buggy as of 2/14/16; maybe a data revision?') for name in ("F-F_Research_Data_Factors", "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3", "F-F_ST_Reversal_Factor", "F-F_Momentum_Factor"): ff = DataReader(name, "famafrench") self.assertTrue(ff is not None) self.assertIsInstance(ff, dict)
def factors_df(end=True): ff = pd.DataFrame(DataReader("F-F_Research_Data_Factors", "famafrench")[0]) ff.columns = ['Mkt_rf', 'SMB', 'HML', 'rf'] ff.index = [dt.datetime(d / 100, d % 100, 1) for d in ff.index] if end: ff.index = ff.index.to_period('M').to_timestamp('M') return ff
def get(self, ticker): ''' Retrieves EOD data from cache or the web. :param ticker: The stock symbol, such as `AAPL`. :returns: The data as a pandas `DataFrame`. ''' start = datetime(1900, 1, 1, 0, 0, 0, 0) return DataReader(ticker, data_source=self.data_source, start=start)
def get_history(self, stock): print "Retrieving data for %s" % stock prices = None try: start_date = datetime.today() - timedelta(days=365) prices = DataReader(stock, "yahoo", start=start_date) except (HTTPError, BadStatusLine): pass return prices
def data_StockView_import(self): self.data = DataReader("GOOGL", "google", self.start_date, self.end_date) source = urllib2.urlopen( 'http://hopey.netfonds.no/posdump.php?date=20140530&paper=AAPL.O&csv_format=txt' ) data = pandas.read_table(source) print data
def stocks(): tickers = ['AAPL', 'GOOG', 'MSFT', 'AMZN'] end = datetime.now() start = datetime(end.year-1, end.month, end.day) for ticker in tickers: globals()[ticker] = DataReader(ticker, 'yahoo', start, end) build_stock_analyses() return render_template('stocks.html', AAPL=AAPL, GOOG=GOOG, MSFT=MSFT, AMZN=AMZN)
def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None): """Load closing prices from yahoo finance. :Optional: indexes : dict (Default: {'SPX': '^GSPC'}) Financial indexes to load. stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT', 'XOM', 'AA', 'JNJ', 'PEP', 'KO']) Stock closing prices to load. start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)) Retrieve prices from start date on. end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)) Retrieve prices until end date. :Note: This is based on code presented in a talk by Wes McKinney: http://wesmckinney.com/files/20111017/notebook_output.pdf """ assert indexes is not None or stocks is not None, """ must specify stocks or indexes""" if start is None: start = pd.datetime(1990, 1, 1, 0, 0, 0, 0, pytz.utc) if not start is None and not end is None: assert start < end, "start date is later than end date." data = OrderedDict() if stocks is not None: for stock in stocks: print stock stkd = DataReader(stock, 'yahoo', start, end).sort_index() data[stock] = stkd if indexes is not None: for name, ticker in indexes.iteritems(): print name stkd = DataReader(ticker, 'yahoo', start, end).sort_index() data[name] = stkd return data