def get_stock(stock, beg, end, write=False, load=False): """ return df from yfinance """ if load == True: return pd.read_csv(f'./{stock}.csv') ret_df = DataReader(stock, 'yahoo', beg, end) if write: ret_df.to_csv(f'./{stock}.csv') return ret_df
def test_read_famafrench(self): for name in ("F-F_Research_Data_Factors", "F-F_Research_Data_Factors_weekly", "6_Portfolios_2x3", "F-F_ST_Reversal_Factor", "F-F_Momentum_Factor"): ff = DataReader(name, "famafrench") assert ff assert isinstance(ff, dict)
def p03_AAPL(tomtom): try: DAX = DataReader('AAPL','yahoo',start = '01/01/2000') DAX.to_csv(tomtom.get_tmp_name('p03_AAPL.csv')) except RemoteDataError: print('Error while reading data, revert to stored file in example_data') shutil.copy('example_data/p03_AAPL.csv', 'temp')
def ticker_event_history(self, start, end, ticker): try: dr = DataReader(ticker, 'yahoo-actions', start, end).reset_index() except OSError: return [] # if the page cannot be reached for some reason return dr.to_dict(orient="records")
def main(): symbols = ['AAPL', 'MSFT', 'BRK-A'] # Specifically chosen to include the AAPL split on June 9, 2014. for symbol in symbols: data = DataReader( symbol, 'yahoo', start='2014-03-01', end='2014-09-01', ) data.rename( columns={ 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume', }, inplace=True, ) del data['Adj Close'] dest = join(here, symbol + '.csv') print(f"Writing {symbol} -> {dest}") data.to_csv(dest, index_label='day')
def get(self, symbol, asset_type=None, **kwargs): """ Get the latest data for the specified symbol """ if not asset_type: logger.error("No asset_type argument set") raise Exception("asset_type must be set") if asset_type == "F": symbol = symbol.replace("/", "") + "=X" try: df = DataReader(symbol, "yahoo", start="1950-01-01", **kwargs) except (RemoteDataError, KeyError): logger.error(f"Symbol {symbol} not found") raise SymbolNotFound(symbol) df = (df.rename( columns={ "Open": "open", "High": "high", "Low": "low", "Close": "close", "Volume": "volume", "Adj Close": "adj_close" }).sort_index()) df = (df[["adj_close"]].rename({"adj_close": "close"}, axis=1)) return df
def load_data(): if os.path.exists('000001.csv'): data_ss = pd.read_csv('000001.csv') else: # 上证综指 data_ss = DataReader("000001.SS", "yahoo", start, end) data_ss.to_csv('000001.csv') if os.path.exists('000002.csv'): data_tlz = pd.read_csv('000002.csv') else: # A股指数 data_tlz = DataReader("000002.SS", "yahoo", start, end) data_tlz.to_csv('000002.csv') return data_ss, data_tlz
def setUp(self): # env.logger.setLevel(logging.WARN) self.symbol = "TSLA" self.start = "2015-04-01" self.end = "2015-06-30" self.h = DataReader(self.symbol, DS, self.start, self.end)
def get_data_for_multiple_stocks(tickers, start_date, end_date): ''' Obtain stocks information (Date, OHLC, Volume and Adjusted Close). Uses Pandas DataReader to make an API Call to Yahoo Finance and download the data directly. Computes other values - Log Return and Arithmetic Return. Args: tickers: List of Stock Tickers start_date: Start Date of the stock data end_date: End Date of the stock data Returns: A dictionary of dataframes for each stock ''' stocks = dict() for ticker in tickers: s = DataReader(ticker, 'yahoo', start_date, end_date) s.insert( 0, "Ticker", ticker) #insert ticker column so you can reference better later s['Date'] = pd.to_datetime(s.index) #useful for transformation later s['Prev Adj Close'] = s['Adj Close'].shift(1) s['Log Return'] = np.log(s['Adj Close'] / s['Prev Adj Close']) s['Return'] = (s['Adj Close'] / s['Prev Adj Close'] - 1) s = s.reset_index(drop=True) cols = list(s.columns.values) # re-arrange columns cols.remove("Date") s = s[["Date"] + cols] s["Date"] = pd.to_datetime(s["Date"]) s = s.set_index("Date") stocks[ticker] = s return stocks
def test_train(self): a = DataReader('F', 'google', datetime(2006, 6, 1), datetime(2016, 6, 1)) a_returns = pd.DataFrame(np.diff(np.log(a['Close'].values))) a_returns.index = a.index.values[1:a.index.values.shape[0]] train_value = a_returns.values scaler, x_train, y_train, x_test, y_test = time_step_data_convert( original_data=train_value, num_time_steps=5, batch_size=10, ) lstm_instance = LSTMModel(num_time_steps=5) train_model_instance = TrainModel(model_object=lstm_instance, model_name='lstm') predicted, score, rmse = train_model_instance.train(Xtrain=x_train, Ytrain=y_train, Xtest=x_test, Ytest=y_test) score_check = False rmse_check = False if score < 0.001: score_check = True self.assertEqual(True, score_check) if rmse < 0.031: rmse_check = True self.assertEqual(True, rmse_check) color_list = ['r--', 'b:'] label_list = ['predict_x', 'real_y'] data_list = [predicted, y_train] visualize_wave(color_list=color_list, label_list=label_list, data_list=data_list)
def historical_volatility(sym, days): "Return the annualized stddev of daily log returns of `sym`." try: quotes = DataReader(sym, 'google')['Close'][-days:] except Exception, e: print "Error getting data for symbol '{}'.\n".format(sym), e return None, None
def view_ticker(): stock = request.form['ticker'] #print stock start = request.form['start'] start = datetime.strptime(start, '%Y-%m-%d') start = start.date() #print request.form['start'] end = request.form['end'] end = datetime.strptime(end, '%Y-%m-%d') end = end.date() #print "end" value = '.4' status = 'Close' if request.form.get('box1'): value = '.4' status = 'Close' if request.form.get('box2'): value = '.11' status = 'Adj Close' if request.form.get('box3'): value = '.1' status = 'Open' mydata = DataReader(stock, 'yahoo', start, end) #mydata = qd.get("WIKI/" + stock + value, rows = 20, api_key='oSvidbxNa84mVv7Kzqh2') p = figure(x_axis_type='datetime', title=status + " Price for " + request.form['ticker']) p.line('Date', status, source=mydata) p.xaxis.axis_label = "Date" #str(status) p.yaxis.axis_label = "Price" html = file_html(p, CDN, "my plot") return html
def get_outperformance(ticker_list, idx, start=datetime(1993, 1, 1), end=datetime(2019, 7, 9)): errors = [] complete = len(ticker_list) done = 0 for ticker in ticker_list: try: prices = DataReader(ticker, 'yahoo', start, end) yr_ret = list(prices['Close'].pct_change(252).dropna()) length = len(prices) for i in range(length - len(yr_ret)): yr_ret.append(nan) tmp_idx = idx.loc[prices.index] prices['yr_ret'] = yr_ret prices['outperformance'] = ( (prices['yr_ret'] > tmp_idx).astype(int) - (prices['yr_ret'] < -tmp_idx).astype(int)) prices['magnitude'] = abs(prices['yr-ret']) - abs(tmp_idx) st = str(min(prices.index))[:-9] en = str(max(prices.index))[:-9] file = ticker + '_' + st + '_' + en + '.csv' prices.to_csv(file) except: errors.append(ticker) done += 1 print('\r' + '|' + ((u'\u2588') * (int( (done + 1) / complete * 100))).ljust(99) + '| {0:.2f}%'.format(min((done + 1) / complete * 100, 100)), end='') return errors
def load_eur(): """ Return cash rate for EUR and DEM prior to the introduction of EUR """ bank_rate = quandl.get(CashFile.GER_BANKRATE.value, api_key=quandl_token) ww2_data = pd.DataFrame([4.0, 3.5, 5.0], index=[ datetime(1936, 6, 30), datetime(1940, 4, 9), datetime(1948, 6, 28) ]) ww2_month = pd.date_range('1936-06-01', '1948-06-01', freq='M') ww2_month = pd.DataFrame(index=ww2_month) ww2_data = pd.concat((ww2_data, ww2_month), axis=1).fillna(method="pad") parser = lambda d: date_shift(datetime.strptime(d, "%Y-%m"), "+BMonthEnd") filename = join(DATA_DIRECTORY, 'cash_rate', 'eur', 'BBK01.SU0112.csv') discount_rate = pd.read_csv(filename, index_col=0, skiprows=[1, 2, 3, 4], usecols=[0, 1], engine="python", skipfooter=95, parse_dates=True, date_parser=parser) ib_rate = DataReader(CashFile.EUR_3M_IB_RATE.value, "fred", START_DATE) libor = quandl.get(CashFile.EUR_3M_EURIBOR.value, api_key=quandl_token) data = (pd.concat( (bank_rate[:"1936-06"].fillna(method="pad"), ww2_data, discount_rate[:"1959"].fillna(method="pad"), to_monthend(ib_rate['1960':"1998"].fillna(method="pad")), libor['1999':].fillna(method="pad")), axis=1).sum(axis=1).rename("cash_rate_eur")) return data
def ingest_stocks_to_df(cls,time_diff_years=6, ticker_list=['GSIT', 'ICAD', 'XAIR', 'LTRN', 'ARKK', 'ARKF', 'ARKW']): """Takes list of stock tickers to pull from yahoo; returns dataframe, multi-indexed; TODO when not using streamlit: Saves; If already queried, loads from file""" end = datetime.now() start = datetime(end.year - time_diff_years, end.month, end.day) dfs = [DataReader(stock, 'yahoo', start, end) for stock in ticker_list] stocks_df = pd.concat(dfs, axis=1, join='outer', keys=ticker_list) stocks_df.insert(loc=0, column='date', value=stocks_df.index) return stocks_df
def check_data_reader(code): try: DataReader(code, 'yahoo') except: print('%s ok!' % code) return False print('%s not ok!' % code) return True
def obtem_dados_setores(self): """Retorna df com dados das ações do setor informado """ data_source = 'yahoo' for setor in self.setores: dados_precos = DataReader(self.setores_tickers[setor], data_source, self.data_ini, self.data_fim) self.dados[setor] = dados_precos[['Adj Close']]
def start_market_simulation(self): data = DataReader(self.ticker, self.source, self.start, self.end) for time, row in data.iterrows(): self.md.add_last_price(time, self.ticker, row["Close"], row["Volume"]) self.md.add_open_price(time, self.ticker, row["Open"]) if not self.event_tick is None: self.event_tick(self.md)
def get_stock_quote(tickers): c() start = start_date1 end = end_date1 source = 'yahoo' price = DataReader(tickers, source, start, end) df = pd.DataFrame(price) return df
def compose_dataframe(equity_sym): """Provided a list of stock symbols, return a dataframe for Adj close """ df = pd.DataFrame() # init empty dframe for s in equity_sym: tmp = DataReader(s, "yahoo", start_dt) # yahoo stands for source, not equity df[s] = tmp['Adj Close'] return df
def addCustomColumns(df, market_upd=False): start = datetime.date( int(df.index.get_level_values('date')[0]) - 10, int(df['month'].values[0]), 1) end_date_ls = [ int(d) for d in datetime.date.today().strftime('%Y-%m-%d').split("-") ] end = datetime.date(end_date_ls[0], end_date_ls[1], end_date_ls[2]) try: url = "https://www.quandl.com/api/v1/datasets/WIKI/{0}.csv?column=4&sort_order=asc&trim_start={1}&trim_end={2}".format( df.index.get_level_values('ticker')[0], start, end) qr = pd.read_csv(url) qr['Date'] = qr['Date'].astype('datetime64[ns]') # quotes = DataReader(df.index.get_level_values('ticker')[0], 'yahoo', start, end, pause=1)['Close'] # quotes = DataReader(df.index.get_level_values('ticker')[0], 'yahoo', start, end, pause=1)['Close'] except: print("Could not read time series data for %s" % df.index.get_level_values('ticker')[0]) exc_type, exc_obj, exc_tb = sys.exc_info() app.logger.info( "Could not read time series data for {3}: {0}, {1}, {2}".format( exc_type, exc_tb.tb_lineno, exc_obj, df.index.get_level_values('ticker')[0])) raise df = addBasicCustomCols(df, qr) df = addGrowthCustomCols(df, qr) df = addTimelineCustomCols(df, qr) pdb.set_trace() if market_upd: # market = DataReader(".INX", 'google', start, end, pause=1)['Close'] market = DataReader('^GSPC', 'yahoo', start, end, pause=1)['Close'] market.to_csv( '/home/ubuntu/workspace/finance/app/static/docs/market.csv') quotes = pd.DataFrame(quotes) market.columns = ['Date', 'market'] market.set_index('Date') quotes['market'] = market else: market = pd.read_csv( '/home/ubuntu/workspace/finance/app/static/docs/market.csv') market.columns = ['Date', 'market'] market['Date'] = market['Date'].apply(pd.to_datetime) market = market.set_index('Date') qr = pd.merge(qr.set_index('Date'), market, left_index=True, right_index=True) df = calcBetas(df, qr) ''' Still need to do: 'enterpriseToEbitda' 'ebitdaMargins' 'ebitda', 'shortRatio' ''' return df
def p01_volumes(tomtom): try: symbols = ['AAPL', 'JNJ', 'XOM'] data = dict([(sym, DataReader(sym, 'yahoo')['Volume']) for sym in symbols]) df = pd.DataFrame.from_dict(data) df.ix[-7:-3].to_csv(tomtom.get_tmp_name('p01_volumes.txt')) except RemoteDataError: print('Error while reading data, revert to stored file in example_data') shutil.copy('example_data/p01_volumes.txt', 'temp')
def update_graph(tickers): graphs = [] for i, ticker in enumerate(tickers): try: df = DataReader(ticker, 'iex', dt.datetime(2017,1,1), dt.datetime.now()) except Exception: graphs.append(html.H3( 'Data is not available for {}'.format(ticker), style={'marginTop': 20, 'marginBottom': 20} )) # except Exception as e: # graphs.append(html.H3( # 'Data is not available for {}'.format(e), # style={'marginTop': 20, 'marginBottom': 20} # )) continue candlestick = { 'x': df.index, 'open': df['open'], 'high': df['high'], 'low': df['low'], 'close': df['close'], 'type': 'candlestick', 'name': ticker, 'legendgroup': ticker, # 'increasing': {'line': {'color': colorscale[0]}}, # 'decreasing': {'line': {'color': colorscale[1]}} } bb_bands = bbands(df.close) bollinger_traces = [{ 'x': df.index, 'y': y, 'type': 'scatter', 'mode': 'lines', 'line': {'width': 1, 'color': colorscale[(i*2) % len(colorscale)]}, 'hoverinfo': 'none', 'legendgroup': ticker, 'showlegend': True if i == 0 else False, 'name': '{} - bollinger bands'.format(ticker) } for i, y in enumerate(bb_bands)] graphs.append(dcc.Graph( id=ticker, figure={ 'data': [candlestick] + bollinger_traces, 'layout': { 'margin': {'b': 0, 'r': 10, 'l': 60, 't': 0}, 'legend': {'x': 0} } } )) return graphs
def stock_predict_plt(key): end = datetime.now() start = datetime(end.year - 2, end.month, end.day) df = DataReader(key, data_source='yahoo', start=start, end=end) data = df.filter(['Close']) dataset = data.values training_data_len = int(np.ceil(len(dataset) * .8)) scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(dataset) train_data = scaled_data[0:int(training_data_len), :] x_train = [] y_train = [] for i in range(30, len(train_data)): x_train.append(train_data[i - 30:i, 0]) y_train.append(train_data[i, 0]) x_train, y_train = np.array(x_train), np.array(y_train) x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) model = Sequential() model.add( LSTM(70, return_sequences=False, input_shape=(x_train.shape[1], 1))) model.add(Dense(1)) model.compile(optimizer='adam', loss='mean_squared_error') model.fit(x_train, y_train, batch_size=1, epochs=20) test_data = scaled_data[training_data_len - 30:, :] x_test = [] y_test = dataset[training_data_len:, :] for i in range(30, len(test_data)): x_test.append(test_data[i - 30:i, 0]) x_test = np.array(x_test) x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) predictions = model.predict(x_test) predictions = scaler.inverse_transform(predictions) train = data[-500:training_data_len] valid = data[training_data_len:] valid['Predictions'] = predictions img = io.BytesIO() plt.figure(figsize=(16, 8)) plt.xlabel('Date', fontsize=18) plt.ylabel('Close Price USD ($)', fontsize=18) plt.plot(train['Close']) plt.plot(valid[['Close', 'Predictions']]) plt.legend(['Train', 'Val', 'Predictions'], loc='lower right') plt.savefig(img, format='png') plot_url = base64.b64encode(img.getbuffer()).decode("ascii") return plot_url
def get_data_if_not_exists(): if os.path.exists("./data/ibm.csv"): return pd.read_csv("./data/ibm.csv") else: if not os.path.exists("./data"): os.mkdir("data") ibm_data = DataReader('IBM', 'yahoo', datetime(1950, 1, 1), datetime.today()) pd.DataFrame(ibm_data).to_csv("./data/ibm.csv") return pd.DataFrame(ibm_data)
def get_benchmark(): c() start = start_date1 end = end_date1 source = 'yahoo' bench = ['^GSPC'] adj_close = DataReader(bench, source, start, end)['Adj Close'] bench_returns = adj_close.pct_change() bench_returns = bench_returns.dropna() return bench_returns
def stock_autocorr(ticker): stock = DataReader(ticker, data_source, start_date) stock['mean'] = (stock['Open'] + stock['High'] + stock['Low'] + stock['Close']) / 4 stock_change = stock.drop(drop_col, axis=1) stock_change['pct_change'] = stock_change.pct_change() stock_autocorr = stock_change['pct_change'].autocorr() print( 'Over the past {0} years, the auto-correlation of {1} daily point change is:{2}' .format(years, ticker, stock_autocorr))
def multi_loader(ticker_list, start_date = None, end_date = None, source = 'yahoo', retry = 3, pause = 0.001, sess = None, api_key = None, periodicity = 'daily', term = 'end', index = -1, region = 'US'): ''' Wraps pandas_datareader DataReader methods and perfoms calendar manipulations Args: ticker_list: list of tickers as strings to get information about start_date: the oldest date to get data for, default 1 year ago. Accepts either a string 'YYYY-MM-DD' or a datetime.date object. end_date: the most recent date to get data for, default today. Accepts either a string 'YYYY-MM-DD' or a datetime.date object. source: the place to get the data from, default 'yahoo'. Check the pandas_datareader documentation for a list of valid sources. retry: the amount of times to retry a query after failure, default 3 pause: seconds to pause between queries, default 0.001 sess: the session instance to be used. Accepts requests.sessions api_key: the api key for the data source if applicable periodicity: the period type to use. Accepts 'daily', 'weekly','monthly' term: from what point to calculate the period from. Accepts 'start' or 'end' index: the index of the source data frame to use in the portfolio, default -1 region: the country to use for calculations. Accepts 'US' or 'UK' Returns: A pandas data frame object containing the requested information ''' if not end_date: end_date = date.today() if not start_date: start_date = str(date.today()).split('-') start_date = date(int(start_date[0])-1,int(start_date[1]),int(start_date[2])) price_data = {} ticker_list = [tk.upper() for tk in ticker_list] for ticker in ticker_list: price = DataReader(name = ticker, data_source = source, start = start_date, end = end_date, retry_count = retry, pause = pause, session = sess, api_key = api_key) price = price[price.columns[index]] price_data[ticker] = price price_data = pd.DataFrame(price_data) price_data.columns = ticker_list if periodicity.lower() != 'daily': price_data = _reduce(price_data, periodicity.lower(), term.lower(), region.upper()) return price_data
def tsa_unemployment(): ''' http://www.statsmodels.org/stable/examples/notebooks/generated/statespace_cycles.html ''' endog = DataReader("UNRATE", "fred", start="1954-01-01") print(endog) hp_cycle, hp_trend = sm.tsa.filters.hpfilter(endog, lamb=129600) print(hp_cycle, hp_trend) ## Unobserved components and ARIMA model (UC-ARIMA) mod_ucarima = sm.tsa.UnobservedComponents(endog, 'rwalk', autoregressive=4) # Here the powell method is used, since it achieves a # higher loglikelihood than the default L-BFGS method res_ucarima = mod_ucarima.fit(method="powell", disp=False) print(res_ucarima.summary()) ## Unobserved components with stochastic cycle (UC) mod_uc = sm.tsa.UnobservedComponents( endog, 'rwalk', cycle=True, stochastic_cycle=True, damped_cycle=True, ) # Here the powell method gets close to the optimum res_uc = mod_uc.fit(method='powell', disp=False) # but to get to the highest loglikelihood we do a # second round using the L-BFGS method. res_uc = mod_uc.fit(res_uc.params, disp=False) print(res_uc.summary()) fig, axes = plt.subplots(2, figsize=(13, 5)) axes[0].set(title='Level/trend component') axes[0].plot(endog.index, res_uc.level.smoothed, label='UC') axes[0].plot(endog.index, res_ucarima.level.smoothed, label='UC-ARIMA(2,0)') axes[0].plot(hp_trend, label='HP Filter') axes[0].legend(loc='upper left') axes[0].grid() axes[1].set(title='Cycle component') axes[1].plot(endog.index, res_uc.cycle.smoothed, label='UC') axes[1].plot(endog.index, res_ucarima.autoregressive.smoothed, label='UC-ARIMA(2,0)') axes[1].plot(hp_cycle, label='HP Filter') axes[1].legend(loc='upper left') axes[1].grid() fig.tight_layout() plt.show() plt.close()
def load_data(*func_codes): datas = [] for code in func_codes: if os.path.exists(f"{code}.csv"): data = pd.read_csv(f"{code}.csv") else: data = DataReader(f"{code}.SZ", "yahoo", start, end) data.to_csv(f'{code}.csv') datas.append(data) return datas