def get_monthly_data(self): #TODO add better module for data management and auto naming files if self.source == "pickle": self.data = pickle.load( open("{}.p".format(self.name_of_data), "rb")) else: if self.source in ["google", "yahoo"]: #DEPRICATED raw_data = pdr.DataReader( self.market_indecies + self.stock_ticks, self.source, self.start, self.end) adj_data = raw_data["Close"] if self.source == "quandl": adj_data = defaultdict() for ticker in self.stock_ticks + self.market_indecies: data = QuandlReader(symbols=ticker, start=self.start, end=self.end).read() if "AdjClose" in data.columns: adj_data[ticker] = data["AdjClose"] elif "IndexValue" in data.columns: adj_data[ticker] = data["IndexValue"] sleep(0.4) adj_data = pd.DataFrame(adj_data) self.data = adj_data.groupby( pd.Grouper(freq='MS')).mean() #adjusted monthly data if self.save_data: pickle.dump(self.data, open("{}.p".format(self.name_of_data), "wb"))
def get_data_from_yahoo(reload_sp500=False): if reload_sp500: tickers = save_sp500_tickers() else: with open("sp500tickers.pickle", "rb") as f: tickers = pickle.load(f) if not os.path.exists('stock_dfs'): os.makedirs('stock_dfs') start = dt.datetime(2010, 1, 1) end = dt.datetime.now() for ticker in tickers: # just in case your connection breaks, we'd like to save our progress! if not os.path.exists('stock_dfs/{}.csv'.format(ticker)): df = QuandlReader("WIKI/{}".format(ticker), start=start, end=end) #df = web.DataReader(ticker, '', start, end) df.reset_index(inplace=True) df.set_index("Date", inplace=True) df = df.drop("Symbol", axis=1) df.to_csv('stock_dfs/{}.csv'.format(ticker)) else:
def DataReader( name, data_source=None, start=None, end=None, retry_count=3, pause=0.1, session=None, api_key=None, ): """ Imports data from a number of online sources. Currently supports Google Finance, St. Louis FED (FRED), and Kenneth French's data library, among others. Parameters ---------- name : str or list of strs the name of the dataset. Some data sources (IEX, fred) will accept a list of names. data_source: {str, None} the data source ("iex", "fred", "ff") start : {datetime, None} left boundary for range (defaults to 1/1/2010) end : {datetime, None} right boundary for range (defaults to today) retry_count : {int, 3} Number of times to retry query request. pause : {numeric, 0.001} Time, in seconds, to pause between consecutive queries of chunks. If single value given for symbol, represents the pause between retries. session : Session, default None requests.sessions.Session instance to be used api_key : (str, None) Optional parameter to specify an API key for certain data sources. Examples ---------- # Data from Google Finance aapl = DataReader("AAPL", "iex") # Price and volume data from IEX tops = DataReader(["GS", "AAPL"], "iex-tops") # Top of book executions from IEX gs = DataReader("GS", "iex-last") # Real-time depth of book data from IEX gs = DataReader("GS", "iex-book") # Data from FRED vix = DataReader("VIXCLS", "fred") # Data from Fama/French ff = DataReader("F-F_Research_Data_Factors", "famafrench") ff = DataReader("F-F_Research_Data_Factors_weekly", "famafrench") ff = DataReader("6_Portfolios_2x3", "famafrench") ff = DataReader("F-F_ST_Reversal_Factor", "famafrench") """ expected_source = [ "yahoo", "iex", "iex-tops", "iex-last", "iex-last", "bankofcanada", "stooq", "iex-book", "enigma", "fred", "famafrench", "oecd", "eurostat", "nasdaq", "quandl", "moex", "robinhood", "tiingo", "yahoo-actions", "yahoo-dividends", "av-forex", "av-daily", "av-daily-adjusted", "av-weekly", "av-weekly-adjusted", "av-monthly", "av-monthly-adjusted", "av-intraday", "econdb", ] if data_source not in expected_source: msg = "data_source=%r is not implemented" % data_source raise NotImplementedError(msg) if data_source == "yahoo": return YahooDailyReader( symbols=name, start=start, end=end, adjust_price=False, chunksize=25, retry_count=retry_count, pause=pause, session=session, ).read() elif data_source == "iex": return IEXDailyReader( symbols=name, start=start, end=end, chunksize=25, api_key=api_key, retry_count=retry_count, pause=pause, session=session, ).read() elif data_source == "iex-tops": return IEXTops( symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, ).read() elif data_source == "iex-last": return IEXLasts( symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, ).read() elif data_source == "bankofcanada": return BankOfCanadaReader( symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, ).read() elif data_source == "stooq": return StooqDailyReader( symbols=name, chunksize=25, retry_count=retry_count, pause=pause, session=session, ).read() elif data_source == "iex-book": return IEXDeep( symbols=name, service="book", start=start, end=end, retry_count=retry_count, pause=pause, session=session, ).read() elif data_source == "enigma": return EnigmaReader(dataset_id=name, api_key=api_key).read() elif data_source == "fred": return FredReader( symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, ).read() elif data_source == "famafrench": return FamaFrenchReader( symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, ).read() elif data_source == "oecd": return OECDReader( symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, ).read() elif data_source == "eurostat": return EurostatReader( symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, ).read() elif data_source == "nasdaq": if name != "symbols": raise ValueError("Only the string 'symbols' is supported for " "Nasdaq, not %r" % (name, )) return get_nasdaq_symbols(retry_count=retry_count, pause=pause) elif data_source == "quandl": return QuandlReader( symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, api_key=api_key, ).read() elif data_source == "moex": return MoexReader( symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, ).read() elif data_source == "robinhood": return RobinhoodHistoricalReader( symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, ).read() elif data_source == "tiingo": return TiingoDailyReader( symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, api_key=api_key, ).read() elif data_source == "yahoo-actions": return YahooActionReader( symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, ).read() elif data_source == "yahoo-dividends": return YahooDivReader( symbols=name, start=start, end=end, adjust_price=False, chunksize=25, retry_count=retry_count, pause=pause, session=session, interval="d", ).read() elif data_source == "av-forex": return AVForexReader( symbols=name, retry_count=retry_count, pause=pause, session=session, api_key=api_key, ).read() elif data_source == "av-daily": return AVTimeSeriesReader( symbols=name, function="TIME_SERIES_DAILY", start=start, end=end, retry_count=retry_count, pause=pause, session=session, api_key=api_key, ).read() elif data_source == "av-daily-adjusted": return AVTimeSeriesReader( symbols=name, function="TIME_SERIES_DAILY_ADJUSTED", start=start, end=end, retry_count=retry_count, pause=pause, session=session, api_key=api_key, ).read() elif data_source == "av-weekly": return AVTimeSeriesReader( symbols=name, function="TIME_SERIES_WEEKLY", start=start, end=end, retry_count=retry_count, pause=pause, session=session, api_key=api_key, ).read() elif data_source == "av-weekly-adjusted": return AVTimeSeriesReader( symbols=name, function="TIME_SERIES_WEEKLY_ADJUSTED", start=start, end=end, retry_count=retry_count, pause=pause, session=session, api_key=api_key, ).read() elif data_source == "av-monthly": return AVTimeSeriesReader( symbols=name, function="TIME_SERIES_MONTHLY", start=start, end=end, retry_count=retry_count, pause=pause, session=session, api_key=api_key, ).read() elif data_source == "av-monthly-adjusted": return AVTimeSeriesReader( symbols=name, function="TIME_SERIES_MONTHLY_ADJUSTED", start=start, end=end, retry_count=retry_count, pause=pause, session=session, api_key=api_key, ).read() elif data_source == "av-intraday": return AVTimeSeriesReader( symbols=name, function="TIME_SERIES_INTRADAY", start=start, end=end, retry_count=retry_count, pause=pause, session=session, api_key=api_key, ).read() elif data_source == "econdb": return EcondbReader( symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, ).read() else: msg = "data_source=%r is not implemented" % data_source raise NotImplementedError(msg)
def get_data_quandl(*args, **kwargs): return QuandlReader(*args, **kwargs).read()
def DataReader(name, data_source=None, start=None, end=None, retry_count=3, pause=0.001, session=None, access_key=None): """ Imports data from a number of online sources. Currently supports Google Finance, St. Louis FED (FRED), and Kenneth French's data library, among others. Parameters ---------- name : str or list of strs the name of the dataset. Some data sources (google, fred) will accept a list of names. data_source: {str, None} the data source ("google", "fred", "ff") start : {datetime, None} left boundary for range (defaults to 1/1/2010) end : {datetime, None} right boundary for range (defaults to today) retry_count : {int, 3} Number of times to retry query request. pause : {numeric, 0.001} Time, in seconds, to pause between consecutive queries of chunks. If single value given for symbol, represents the pause between retries. session : Session, default None requests.sessions.Session instance to be used access_key : (str, None) Optional parameter to specify an API key for certain data sources. Examples ---------- # Data from Google Finance aapl = DataReader("AAPL", "google") # Price and volume data from IEX tops = DataReader(["GS", "AAPL"], "iex-tops") # Top of book executions from IEX gs = DataReader("GS", "iex-last") # Real-time depth of book data from IEX gs = DataReader("GS", "iex-book") # Data from FRED vix = DataReader("VIXCLS", "fred") # Data from Fama/French ff = DataReader("F-F_Research_Data_Factors", "famafrench") ff = DataReader("F-F_Research_Data_Factors_weekly", "famafrench") ff = DataReader("6_Portfolios_2x3", "famafrench") ff = DataReader("F-F_ST_Reversal_Factor", "famafrench") """ if data_source == "yahoo": raise ImmediateDeprecationError(DEP_ERROR_MSG.format('Yahoo Daily')) return YahooDailyReader(symbols=name, start=start, end=end, adjust_price=False, chunksize=25, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "yahoo-actions": raise ImmediateDeprecationError(DEP_ERROR_MSG.format('Yahoo Actions')) return YahooActionReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "yahoo-dividends": comp = 'Yahoo Dividends' raise ImmediateDeprecationError(DEP_ERROR_MSG.format(comp)) return YahooDivReader(symbols=name, start=start, end=end, adjust_price=False, chunksize=25, retry_count=retry_count, pause=pause, session=session, interval='d').read() elif data_source == "google": return GoogleDailyReader(symbols=name, start=start, end=end, chunksize=25, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "iex": return IEXDailyReader(symbols=name, start=start, end=end, chunksize=25, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "iex-tops": return IEXTops(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "iex-last": return IEXLasts(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "bankofcanada": return BankOfCanadaReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "stooq": return StooqDailyReader(symbols=name, chunksize=25, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "iex-book": return IEXDeep(symbols=name, service="book", start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "enigma": return EnigmaReader(dataset_id=name, api_key=access_key).read() elif data_source == "fred": return FredReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "famafrench": return FamaFrenchReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "oecd": return OECDReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "eurostat": return EurostatReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "edgar-index": raise ImmediateDeprecationError(DEP_ERROR_MSG.format('EDGAR')) return EdgarIndexReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == 'nasdaq': if name != 'symbols': raise ValueError("Only the string 'symbols' is supported for " "Nasdaq, not %r" % (name, )) return get_nasdaq_symbols(retry_count=retry_count, pause=pause) elif data_source == "quandl": return QuandlReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "moex": return MoexReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "morningstar": return MorningstarDailyReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, interval="d").read() elif data_source == 'robinhood': return RobinhoodHistoricalReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == 'tiingo': return TiingoDailyReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session, api_key=access_key).read() else: msg = "data_source=%r is not implemented" % data_source raise NotImplementedError(msg)
def DataReader(name, data_source=None, start=None, end=None, retry_count=3, pause=0.001, session=None, access_key=None): """ Imports data from a number of online sources. Currently supports Yahoo! Finance, Google Finance, St. Louis FED (FRED), Kenneth French's data library, and the SEC's EDGAR Index. Parameters ---------- name : str or list of strs the name of the dataset. Some data sources (yahoo, google, fred) will accept a list of names. data_source: {str, None} the data source ("yahoo", "yahoo-actions", "yahoo-dividends", "google", "fred", "ff", or "edgar-index") start : {datetime, None} left boundary for range (defaults to 1/1/2010) end : {datetime, None} right boundary for range (defaults to today) retry_count : {int, 3} Number of times to retry query request. pause : {numeric, 0.001} Time, in seconds, to pause between consecutive queries of chunks. If single value given for symbol, represents the pause between retries. session : Session, default None requests.sessions.Session instance to be used Examples ---------- # Data from Yahoo! Finance gs = DataReader("GS", "yahoo") # Corporate Actions (Dividend and Split Data) # with ex-dates from Yahoo! Finance gs = DataReader("GS", "yahoo-actions") # Data from Google Finance aapl = DataReader("AAPL", "google") # Data from FRED vix = DataReader("VIXCLS", "fred") # Data from Fama/French ff = DataReader("F-F_Research_Data_Factors", "famafrench") ff = DataReader("F-F_Research_Data_Factors_weekly", "famafrench") ff = DataReader("6_Portfolios_2x3", "famafrench") ff = DataReader("F-F_ST_Reversal_Factor", "famafrench") # Data from EDGAR index ed = DataReader("full", "edgar-index") ed2 = DataReader("daily", "edgar-index") """ if data_source == "yahoo": return YahooDailyReader(symbols=name, start=start, end=end, adjust_price=False, chunksize=25, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "yahoo-actions": return YahooActionReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "yahoo-dividends": return YahooDivReader(symbols=name, start=start, end=end, adjust_price=False, chunksize=25, retry_count=retry_count, pause=pause, session=session, interval='d').read() elif data_source == "google": return GoogleDailyReader(symbols=name, start=start, end=end, chunksize=25, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "bankofcanada": return BankOfCanadaReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "enigma": return EnigmaReader(dataset_id=name, api_key=access_key).read() elif data_source == "fred": return FredReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "famafrench": return FamaFrenchReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "oecd": return OECDReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "eurostat": return EurostatReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == "edgar-index": return EdgarIndexReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() elif data_source == 'nasdaq': if name != 'symbols': raise ValueError("Only the string 'symbols' is supported for " "Nasdaq, not %r" % (name, )) return get_nasdaq_symbols(retry_count=retry_count, pause=pause) elif data_source == "quandl": return QuandlReader(symbols=name, start=start, end=end, retry_count=retry_count, pause=pause, session=session).read() else: msg = "data_source=%r is not implemented" % data_source raise NotImplementedError(msg)
# -*- coding: utf-8 -*- import pandas as pd import datetime import pandas_datareader.data as web import matplotlib.pyplot as plt from matplotlib import style from pandas_datareader.quandl import QuandlReader style.use('fivethirtyeight') START = datetime.datetime(2010, 1, 1) END = datetime.datetime.now() ticker = 'XOM' #df=web.DataReader("XOM","morningstar",start,end) data = QuandlReader("WIKI/{}".format(ticker), start=START, end=END) df = data.read() print(df) print(df.head()) df['High'].plot() plt.legend() plt.show() print(web.get_data_fred('GS10'))
df_result = pd.merge_asof(df_source, df_data_pivot, left_on=['Open_Date'], right_on=['Date_']) # %% # pull data from Quandl if reload_data: with open('data/vars.json', 'r') as json_file: var_dict = json.load(json_file) quandl_key = var_dict['QUANDL_API'] QR = QuandlReader("AAII/AAII_SENTIMENT", api_key=quandl_key) QR_df = QR.read().reset_index() QR_df.columns = ['AAII_Sent_' + str(col) for col in QR_df.columns] QR_df.to_csv('output/c_mktdata_aaii.csv') else: QR_df = pd.read_csv('output/c_mktdata_aaii.csv') # %% # merge Quandl QR_df_sorted = QR_df.sort_values(['AAII_Sent_Date']) QR_df_sorted['AAII_Sent_Date'] = pd.to_datetime(QR_df['AAII_Sent_Date'], errors='coerce') df_result = pd.merge_asof(