def get_outperformance(ticker_list, idx, start=datetime(1993, 1, 1), end=datetime(2019, 7, 9)): errors = [] complete = len(ticker_list) done = 0 for ticker in ticker_list: try: prices = DataReader(ticker, 'yahoo', start, end) yr_ret = list(prices['Close'].pct_change(252).dropna()) length = len(prices) for i in range(length - len(yr_ret)): yr_ret.append(nan) tmp_idx = idx.loc[prices.index] prices['yr_ret'] = yr_ret prices['outperformance'] = ( (prices['yr_ret'] > tmp_idx).astype(int) - (prices['yr_ret'] < -tmp_idx).astype(int)) prices['magnitude'] = abs(prices['yr-ret']) - abs(tmp_idx) st = str(min(prices.index))[:-9] en = str(max(prices.index))[:-9] file = ticker + '_' + st + '_' + en + '.csv' prices.to_csv(file) except: errors.append(ticker) done += 1 print('\r' + '|' + ((u'\u2588') * (int( (done + 1) / complete * 100))).ljust(99) + '| {0:.2f}%'.format(min((done + 1) / complete * 100, 100)), end='') return errors
def download(self, code, start, end, downloadPath=None, source='morningstar'): self.source = source Protocol.sendStatus("Data source", source) if source == "Yahoo": result = self.downloadYahoo(code, start, end, downloadPath) if not result: return self.download(code, start, end, downloadPath) else: return result else: start = dt.datetime.fromtimestamp( int(start)) if type(start) == str else start if start != None: self.data = DataReader(code, data_source=source, start=start, end=dt.datetime.today()) else: self.data = DataReader(code, data_source=source, end=dt.datetime.today()) self.__loaded = True if downloadPath != None: filepath = os.path.join(downloadPath, code + str(int(end)) + '.csv') self.data.to_csv(filepath) return filepath return None
def get_data_for_multiple_stocks(tickers, start_date, end_date): ''' tickers: list of tickers to get data for start_date, end_date: dt.datetime objects method returns a dictionary b{ticker: pd.DataFrame} ''' start_date_str = start_date.strftime('%Y-%m-%d') end_date_str = end_date.strftime('%Y-%m-%d') stocks = dict() # loop through all the tickers for i, ticker in enumerate(tickers): if i % 5 == 0: print(f'{i}/{len(tickers)}') try: # get the data for the specific ticker s = DataReader(ticker, 'yahoo', start_date_str, end_date_str) s.insert(0, "Ticker", ticker) s['Prev Close'] = s['Adj Close'].shift(1) s['daily_return'] = (s['Adj Close'] / s['Prev Close']) - 1 s['log_return'] = np.log(s['Adj Close'] / s['Prev Close']) # s['perc_return'] = (s['Adj Close']/s['Prev Close']) # add it to the dictionary stocks[ticker] = s except: print(f'something went wrong with {ticker}') continue # return the dictionary return stocks
def get(self, symbol, asset_type=None, **kwargs): """ Get the latest data for the specified symbol """ if not asset_type: logger.error("No asset_type argument set") raise Exception("asset_type must be set") if asset_type == "F": symbol = symbol.replace("/", "") + "=X" try: df = DataReader(symbol, "yahoo", start="1950-01-01", **kwargs) except (RemoteDataError, KeyError): logger.error(f"Symbol {symbol} not found") raise SymbolNotFound(symbol) df = (df.rename( columns={ "Open": "open", "High": "high", "Low": "low", "Close": "close", "Volume": "volume", "Adj Close": "adj_close" }).sort_index()) df = (df[["adj_close"]].rename({"adj_close": "close"}, axis=1)) return df
def build_history(self, ticker, day): self.last_ticker = ticker self.last_day = datetime.strptime(day, '%Y-%m-%d').strftime('%Y-%m-%d') #get data history_start_date = datetime.strptime( day, '%Y-%m-%d') - timedelta(days=self.obs_len) self.scaling_df = DataReader(ticker, 'yahoo', start=self.scaling_start_date) self.history_df = DataReader(ticker, 'yahoo', start=history_start_date, end=day) self.history_df = self.history_df.tail(self.obs_len // 2) # print(self.history_df) # print(sent_df) #returns index to get correct sentiment from firebase return self.history_df.index # test_model = RLModel(cycle_base_model, lstm) # test_model.get_action('AAPL', '2019-01-01')
def data_loader_builder(verbose: bool = True): msci_prices_from_feather = pd.read_feather( '../../data/clean/msci_world_prices.feather') if verbose: print("MSCI World Prices:") msci_prices_from_feather.info() msci_mv_from_feather = pd.read_feather( '../../data/clean/msci_world_mv.feather') if verbose: print("\nMSCI World Market Values:") msci_mv_from_feather.info() # 1-Month Treasury Constant Maturity Rate (GS1M) rf_ = DataReader('GS1M', 'fred', start=datetime.datetime(1990, 1, 1)).resample('MS').mean() # We bring the annual rate to a monthly one rf_m = rf_.div(100).div(12) dl = DataLoader(prices=msci_prices_from_feather, mv=msci_mv_from_feather, rf=rf_m) prices, mv, rf = dl.get_prices(), dl.get_mv(), dl.get_rf() if verbose: print(f'\n\'prices\' shape: {prices.shape}') print(f'\'mv\' shape: {mv.shape}') print(f'\'rf\' shape: {rf.shape}') return rf, prices, mv, dl
def main(): symbols = ['AAPL', 'MSFT', 'BRK-A'] # Specifically chosen to include the AAPL split on June 9, 2014. for symbol in symbols: data = DataReader( symbol, 'yahoo', start='2014-03-01', end='2014-09-01', ) data.rename( columns={ 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume', }, inplace=True, ) del data['Adj Close'] dest = join(here, symbol + '.csv') print("Writing %s -> %s" % (symbol, dest)) data.to_csv(dest, index_label='day')
def p03_AAPL(tomtom): try: DAX = DataReader('AAPL','yahoo',start = '01/01/2000') DAX.to_csv(tomtom.get_tmp_name('p03_AAPL.csv')) except RemoteDataError: print('Error while reading data, revert to stored file in example_data') shutil.copy('example_data/p03_AAPL.csv', 'temp')
def read_history(self, start_date, end_date): start_date, end_date = self.format_date(start_date), self.format_date( end_date) if self.symbol: if self._col: self.data = DataReader(self.symbol, 'yahoo', start=start_date, end=end_date)[self._col].to_frame() else: self.data = DataReader(self.symbol, 'yahoo', start=start_date, end=end_date) else: flag = True while flag: try: sym = self.random_symbol() if self._col: self.data = DataReader( sym, 'yahoo', start=start_date, end=end_date)[self._col].to_frame() else: self.data = DataReader(sym, 'yahoo', start=start_date, end=end_date) if self.data.shape[0] > 200: self.symbol = sym #print self.symbol, self.data.shape[0] flag = False except: print sym, 'cannot be imported'
def crearDatosForecast(ticker, window_size=60, loss='mae', optimizer='adam', metrics=['mae']): # Preparamos el modelo y los datos utilizados para realizar las predicciones # Creamos el modelo modelo = crearModelo(window_size, loss, optimizer, metrics) # Leemos los pesos guardados anteriormente modelo.load_weights(os.path.join(FOLDER, (ticker + '.h5'))) # Se descargar los datos de la serie pero solo de los últimos dos períodos (largo de la ventana * 2) df = DataReader(ticker, data_source='yahoo', start=datetime.now() - timedelta(window_size * 2)) # Creamos una entrada en un diccionario con la clave del ticker. Incluimos el modelo, la serie con valores del cierre # y un vector con los valores del cierre data_dic = { 'modelo': modelo, 'data': df.filter(['Close']), 'dataset': df.filter(['Close']).values } # Agregado V2.0 scaler = loadScaler(ticker) data_dic['scaler'] = scaler data_dic['scaled_data'] = scaler.fit_transform(data_dic['dataset']) return data_dic
def get_stock(stock, beg, end, write=False, load=False): """ return df from yfinance """ if load == True: return pd.read_csv(f'./{stock}.csv') ret_df = DataReader(stock, 'yahoo', beg, end) if write: ret_df.to_csv(f'./{stock}.csv') return ret_df
def ticker_event_history(self, start, end, ticker): try: dr = DataReader(ticker, 'yahoo-actions', start, end).reset_index() except OSError: return [] # if the page cannot be reached for some reason return dr.to_dict(orient="records")
def start_market_simulation(self): data = DataReader(self.ticker, self.source, self.start, self.end) for time, row in data.iterrows(): self.md.add_last_price(time, self.ticker, row["Close"], row["Volume"]) self.md.add_open_price(time, self.ticker, row["Open"]) if not self.event_tick is None: self.event_tick(self.md)
def stock_predict_plt(key): end = datetime.now() start = datetime(end.year - 2, end.month, end.day) df = DataReader(key, data_source='yahoo', start=start, end=end) data = df.filter(['Close']) dataset = data.values training_data_len = int(np.ceil(len(dataset) * .8)) scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(dataset) train_data = scaled_data[0:int(training_data_len), :] x_train = [] y_train = [] for i in range(30, len(train_data)): x_train.append(train_data[i - 30:i, 0]) y_train.append(train_data[i, 0]) x_train, y_train = np.array(x_train), np.array(y_train) x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) model = Sequential() model.add( LSTM(70, return_sequences=False, input_shape=(x_train.shape[1], 1))) model.add(Dense(1)) model.compile(optimizer='adam', loss='mean_squared_error') model.fit(x_train, y_train, batch_size=1, epochs=20) test_data = scaled_data[training_data_len - 30:, :] x_test = [] y_test = dataset[training_data_len:, :] for i in range(30, len(test_data)): x_test.append(test_data[i - 30:i, 0]) x_test = np.array(x_test) x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) predictions = model.predict(x_test) predictions = scaler.inverse_transform(predictions) train = data[-500:training_data_len] valid = data[training_data_len:] valid['Predictions'] = predictions img = io.BytesIO() plt.figure(figsize=(16, 8)) plt.xlabel('Date', fontsize=18) plt.ylabel('Close Price USD ($)', fontsize=18) plt.plot(train['Close']) plt.plot(valid[['Close', 'Predictions']]) plt.legend(['Train', 'Val', 'Predictions'], loc='lower right') plt.savefig(img, format='png') plot_url = base64.b64encode(img.getbuffer()).decode("ascii") return plot_url
def download_remote_updated(symbol): sid = gets.id_for_symbol(symbol) vid = gets.id_for_vendor("Yahoo! Finance") try: update_local_date = gets.updated_for_symbol_vendor(sid, vid) stock = DataReader(symbol, "yahoo", update_local_date) except TypeError: stock = DataReader(symbol, "yahoo") return stock.index[-1].to_pydatetime()
def stock_autocorr(ticker): stock = DataReader(ticker, data_source, start_date) stock['mean'] = (stock['Open'] + stock['High'] + stock['Low'] + stock['Close']) / 4 stock_change = stock.drop(drop_col, axis=1) stock_change['pct_change'] = stock_change.pct_change() stock_autocorr = stock_change['pct_change'].autocorr() print( 'Over the past {0} years, the auto-correlation of {1} daily point change is:{2}' .format(years, ticker, stock_autocorr))
def get_benchmark(): c() start = start_date1 end = end_date1 source = 'yahoo' bench = ['^GSPC'] adj_close = DataReader(bench, source, start, end)['Adj Close'] bench_returns = adj_close.pct_change() bench_returns = bench_returns.dropna() return bench_returns
def getOtherData(symbol, start_date, end_date): try: # Yahoo data = DataReader([symbol], 'yahoo', start_date, end_date) tickers_data = data["Close"] except: # iex data = DataReader([symbol], 'iex', start_date, end_date) tickers_data = data["close"] return tickers_data
def ticker_price_history(self, start, end, ticker): """ Gets and returns the historic prices for a given ticker for between the time period provided. Inclusive. """ try: dr = DataReader(ticker, 'yahoo', start, end).reset_index() except OSError: return [] # if the page cannot be reached for some reason return dr.to_dict(orient="records")
def load_usd(): """ Return cash rate for USD """ nyfed_df = DataReader(CashFile.USD_NYFED_DF.value, "fred", START_DATE) tbill = DataReader(CashFile.USD_3M_TBILL.value, "fred", START_DATE) libor = DataReader(CashFile.USD_3M_LIBOR.value, "fred", START_DATE) data = (pd.concat((to_monthend(nyfed_df[:"1953"]).fillna(method="pad"), tbill['1954':"1985"].fillna(method="pad"), libor['1986':].fillna(method="pad")), axis=1).sum(axis=1).rename("cash_rate_usd")) return data
def load_data(*func_codes): datas = [] for code in func_codes: if os.path.exists(f"{code}.csv"): data = pd.read_csv(f"{code}.csv") else: data = DataReader(f"{code}.SZ", "yahoo", start, end) data.to_csv(f'{code}.csv') datas.append(data) return datas
def get_risk_free_rate_daily(): c() start = start_date1 end = end_date1 source_two = 'fred' code = 'DGS10' rfr = DataReader(code, source_two, start, end) rfr = rfr.dropna() rfr = rfr.mean() / 100 d_rfr = rfr / 252 return np.float64(d_rfr)
def load_gbp(): """ Return cash rate for GBP """ libor = DataReader(CashFile.GBP_3M_LIBOR.value, 'fred', START_DATE) libor_m = DataReader(CashFile.GBP_3M_LIBOR_M.value, "fred", START_DATE) policy_rate = DataReader(CashFile.GBP_POLICY_RATE.value, "fred", START_DATE) data = (pd.concat((policy_rate[:"1969-12"].fillna(method="pad"), libor_m['1970':"1985"].fillna(method="pad"), libor.fillna(method="pad")), axis=1) .sum(axis=1).rename("cash_rate_gbp")) return data
def crearSerie(ticker, start='2012-01-01', end=datetime.now(), window_size=60): # Obtener cotizaciones desde yahoo finance df = DataReader(ticker, data_source='yahoo', start=start, end=end) # Vamos a utilizar los valores del cierre data = df.filter(['Close']) # Obtener valores como array de numpy dataset = data.values # Obtener el número de filas que se utilizarán para el entrenamiento training_data_len = int(np.ceil(len(dataset) * .8)) # Llevar los valores a escala entre 0 y 1 scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(dataset) # Nos guardamos el scaler para utilizarlo en las predicciones (Agregado V2.0) saveScaler(ticker, scaler) # Obtener los valores de entrenamiento train_data = scaled_data[0:int(training_data_len), :] x_train = [] y_train = [] # Divido los datos de train en x_train e y_train # Vamos a usar bloques de train_size for i in range(window_size, len(train_data)): x_train.append(train_data[i - window_size:i, 0]) y_train.append(train_data[i, 0]) # Se conviertes x_train e y_train a numpy array x_train, y_train = np.array(x_train), np.array(y_train) # Se hace reshape de x_train x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) # Se crea dataset para testing test_data = scaled_data[training_data_len - window_size:, :] x_test = [] y_test = dataset[training_data_len:, :] for i in range(window_size, len(test_data)): x_test.append(test_data[i - window_size:i, 0]) # Convierte x_test a numpy array x_test = np.array(x_test) # Se hace reshape de x_test x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) # Última ventana de tiempo (train_size) x_forecast = [] x_forecast.append(scaled_data[len(dataset) - window_size:, 0]) x_forecast = np.array(x_forecast) x_forecast = np.reshape(x_forecast, (x_forecast.shape[0], x_forecast.shape[1], 1)) return scaler, x_train, y_train, x_test, y_test, data, scaled_data, training_data_len, x_forecast
def load_gbp(): """ Return cash rate for GBP """ libor = DataReader(CashFile.GBP_3M_LIBOR.value, 'fred', START_DATE) libor_m = DataReader(CashFile.GBP_3M_LIBOR_M.value, "fred", START_DATE) policy_rate = DataReader(CashFile.GBP_POLICY_RATE.value, "fred", START_DATE) data = (pd.concat((policy_rate[:"1969-12"].fillna(method="pad"), libor_m['1970':"1985"].fillna(method="pad"), libor.fillna(method="pad")), axis=1).sum(axis=1).rename("cash_rate_gbp")) return data
def getcsv(self): ''' pandas DataReaderを利用してYahooFinanceから過去一年ぶんの株価データを抽出、 csvファイル化する関数 ''' if os.path.exists(self.basedir): end = datetime.now() start = datetime(end.year - 1, end.month, end.day) for index in INDEX_LIST: filename = self.basedir + index + ".csv" price = DataReader(index, 'yahoo', start, end) price.to_csv(filename)
def stocks_yahoo(): if not os.path.exists('stocks_dfs'): os.makedirs('stocks_dfs') start = datetime(2017,12,1) end = datetime(2020,12,1) for ticker in tickers: print(ticker) if not os.path.exists('stocks_dfs/{}.csv'.format(ticker)) and ticker not in problems: df = DataReader(ticker,'yahoo',start,end) df.to_csv('stocks_dfs/{}.csv'.format(ticker))
def addCustomColumns(df, market_upd=False): start = datetime.date( int(df.index.get_level_values('date')[0]) - 10, int(df['month'].values[0]), 1) end_date_ls = [ int(d) for d in datetime.date.today().strftime('%Y-%m-%d').split("-") ] end = datetime.date(end_date_ls[0], end_date_ls[1], end_date_ls[2]) try: url = "https://www.quandl.com/api/v1/datasets/WIKI/{0}.csv?column=4&sort_order=asc&trim_start={1}&trim_end={2}".format( df.index.get_level_values('ticker')[0], start, end) qr = pd.read_csv(url) qr['Date'] = qr['Date'].astype('datetime64[ns]') # quotes = DataReader(df.index.get_level_values('ticker')[0], 'yahoo', start, end, pause=1)['Close'] # quotes = DataReader(df.index.get_level_values('ticker')[0], 'yahoo', start, end, pause=1)['Close'] except: print("Could not read time series data for %s" % df.index.get_level_values('ticker')[0]) exc_type, exc_obj, exc_tb = sys.exc_info() app.logger.info( "Could not read time series data for {3}: {0}, {1}, {2}".format( exc_type, exc_tb.tb_lineno, exc_obj, df.index.get_level_values('ticker')[0])) raise df = addBasicCustomCols(df, qr) df = addGrowthCustomCols(df, qr) df = addTimelineCustomCols(df, qr) pdb.set_trace() if market_upd: # market = DataReader(".INX", 'google', start, end, pause=1)['Close'] market = DataReader('^GSPC', 'yahoo', start, end, pause=1)['Close'] market.to_csv( '/home/ubuntu/workspace/finance/app/static/docs/market.csv') quotes = pd.DataFrame(quotes) market.columns = ['Date', 'market'] market.set_index('Date') quotes['market'] = market else: market = pd.read_csv( '/home/ubuntu/workspace/finance/app/static/docs/market.csv') market.columns = ['Date', 'market'] market['Date'] = market['Date'].apply(pd.to_datetime) market = market.set_index('Date') qr = pd.merge(qr.set_index('Date'), market, left_index=True, right_index=True) df = calcBetas(df, qr) ''' Still need to do: 'enterpriseToEbitda' 'ebitdaMargins' 'ebitda', 'shortRatio' ''' return df
def _load_raw_yahoo_data(indexes=None, stocks=None, start=None, end=None): """Load closing prices from yahoo finance. :Optional: indexes : dict (Default: {'SPX': 'SPY'}) Financial indexes to load. stocks : list (Default: ['AAPL', 'GE', 'IBM', 'MSFT', 'XOM', 'AA', 'JNJ', 'PEP', 'KO']) Stock closing prices to load. start : datetime (Default: datetime(1993, 1, 1, 0, 0, 0, 0, pytz.utc)) Retrieve prices from start date on. end : datetime (Default: datetime(2002, 1, 1, 0, 0, 0, 0, pytz.utc)) Retrieve prices until end date. :Note: This is based on code presented in a talk by Wes McKinney: http://wesmckinney.com/files/20111017/notebook_output.pdf """ assert indexes is not None or stocks is not None, """ must specify stocks or indexes""" if start is None: start = pd.datetime(1990, 1, 1, 0, 0, 0, 0, pytz.utc) if start is not None and end is not None: assert start < end, "start date is later than end date." data = OrderedDict() if stocks is not None: for stock in stocks: logger.info('Loading stock: {}'.format(stock)) stock_pathsafe = stock.replace(os.path.sep, '--') cache_filename = "{stock}-{start}-{end}.csv".format( stock=stock_pathsafe, start=start, end=end).replace(':', '-') cache_filepath = get_cache_filepath(cache_filename) if os.path.exists(cache_filepath): stkd = pd.DataFrame.from_csv(cache_filepath) else: stkd = DataReader(stock, 'yahoo', start, end).sort_index() stkd.to_csv(cache_filepath) data[stock] = stkd if indexes is not None: for name, ticker in iteritems(indexes): logger.info('Loading index: {} ({})'.format(name, ticker)) stkd = DataReader(ticker, 'yahoo', start, end).sort_index() data[name] = stkd return data
def __init__(self, stock_list): self.stock_list=stock_list # Set up End and Start times for data grab end = datetime.now() start = datetime(end.year - 1, end.month, end.day) Stocks = {} for stock in stock_list: # Set DataFrame as the Stock Ticker Stocks[stock] = DataReader(stock, 'google', start, end) self.Stocks=Stocks closing_df = DataReader(stock_list, 'google', start, end)['Close'] self.closing_df=closing_df self.stock_rets = closing_df.pct_change()
def read_history(self, start_date, end_date): start_date, end_date = self.format_date(start_date), self.format_date( end_date) if self._col: self.data = DataReader(self.symbol, 'yahoo', start=start_date, end=end_date)[self._col].to_frame() else: self.data = DataReader(self.symbol, 'yahoo', start=start_date, end=end_date)
def HistoricalData(symbol: str, start: str, end=datetime.today().strftime("%Y-%m-%d")): with open("./apikey", "r") as f: key = f.readline() data = DataReader(symbol, "av-daily-adjusted", start, end, api_key=key) # adjusted close price and dates data.to_csv("./data/{}.csv".format(symbol)) price = list(data["adjusted close"]) # adjusted close price with open("./data/{}.csv".format(symbol), "r") as f: dates = [ line[:10] for line in f.readlines()[1:] ] # get date of each price (ignore first line; column description) return {"price": price, "dates": dates}
def get_fx_rates(currency): fx_rates_info = FxRatesInfo[currency].value if fx_rates_info.data_source == 'quandl': fx_rates = quandl.get(fx_rates_info.data_name, api_key=quandl_token) fx_rates = fx_rates['Rate'] else: fx_rates = DataReader(fx_rates_info.data_name, fx_rates_info.data_source, START_DATE) fx_rates = fx_rates.squeeze() fx_rates = (fx_rates.fillna(method='pad').rename( fx_rates_info.fx_rates_name)) return fx_rates
def get_timeseries_gross_return(symbols, startdate, enddate): """Return price return timeseries Keyword arguments: symbols -- Symbol or list of Symbols (string / [string]) startdate -- timeseries start date (datetime.date(year, month, day)) enddate -- timeseries end date (datetime.date(year, month, day)) """ if type(symbols) == str: symbols = [symbols] data = [] for symbol in symbols: try: df = DataReader(symbol, 'yahoo', startdate, enddate)[['Adj Close']] df.columns = [symbol] except: df = pd.DataFrame(np.nan, index=pd.bdate_range(startdate, enddate), columns=[symbol]) data.append(df) return pd.concat(data, axis=1, join='outer')
def p03_DAX(tomtom): DAX = DataReader('^GDAXI','yahoo',start = '01/01/2000') DAX.to_csv(tomtom.get_tmp_name('p03_DAX.csv'))
def EventStudies(): # Define list of stocks to conduct event analysis on. symbols_list = ['AES', 'AET', 'AFL', 'AVP', 'CLX', 'GM', '^GSPC'] # Start and End dates dt_start = dt.datetime(2012, 1,1) dt_end = dt.datetime(2015, 1,1) # Download historical Adjusted Closing prices using Pandas downloader for Yahoo data = DataReader(symbols_list, 'yahoo', dt_start, dt_end)['Adj Close'] # Create dataframe data_ret which includes returns data_ret = data/data.shift(1) - 1 # Define event threshold variable daily_diff daily_diff = 0.03 # Positive event if daily stock return > market return by daily_diff # Negative event if daily stock return < market return by daily_diff # otherwise no event has occurred. # Create an events data frame data_events, where columns = names of all stocks, and rows = daily dates events_col = symbols_list[:] # Use [:] to deep copy the list events_col.remove('^GSPC') # We dont't need to create events for the S&P500 events_index = data_ret.index # Copy the date index from data_ret to the events data frame data_events = pd.DataFrame(index=events_index, columns=events_col) # Fill in data_events with 1 for positive events, -1 for negative events, and NA otherwise. for i in events_col: data_events[i] = np.where((data_ret[i] - data_ret['^GSPC']) > daily_diff, 1, np.where((data_ret[i] - data_ret['^GSPC']) < -daily_diff, -1, np.nan)) # Calculate abnormal returns based on market model (R_it = a_i + B_i*R_mt + e_it) # Define estimation period L1: the greater, the more accurate the model L1 = 30 # Define window for forward and backward looking period. Should be less than L1, window = 20 # Create 2 dictionaries of dictionaries (for positive and negative events) to store the # abnormal returns (AR) values of each window day, for each stock. pos_dict = defaultdict(dict) neg_dict = defaultdict(dict) # For each stock, locate each event and calculate abnormal return for previous window days and future window days for s in events_col: pos_event_dates = data_events[s][data_events[s] == 1].index.tolist() neg_event_dates = data_events[s][data_events[s] == -1].index.tolist() # Create dictionary for each stock to store the AR values of each window day for each event pos_dict_s = defaultdict(dict) neg_dict_s = defaultdict(dict) for pos_event in pos_event_dates: date_loc = data_ret.index.get_loc(pos_event) # Go to beginning of backward window and calculate AR from backward till forward window. date_loc = date_loc - window if date_loc > L1 and date_loc <= len(data_ret) - (2*window+1): index_range = (2*window) + 1 # Create dictionairy to store the AR values for each day of this event pos_dict_s_event = OrderedDict() for d in range(index_range): date_loc2 = date_loc + d # Parameters to estimate market model u_i = data_ret[s][date_loc2-L1 : date_loc2-1].mean() u_m = data_ret['^GSPC'][date_loc2-L1 : date_loc2-1].mean() R_i = data_ret.ix[date_loc2, s] R_m = data_ret.ix[date_loc2,'^GSPC'] beta_i = ((R_i-u_i)*(R_m - u_m))/(R_m - u_m)**2 alpha_i = u_i - (beta_i*u_m) var_err = (1/(L1 -2))*(R_i - alpha_i - (beta_i*R_m))**2 AR_i = R_i - alpha_i - (beta_i*R_m) pos_dict_s_event[date_loc2] = AR_i pos_dict_s[pos_event] = pos_dict_s_event pos_dict[s] = pos_dict_s for neg_event in neg_event_dates: date_loc = data_ret.index.get_loc(neg_event) # Go to beginning of backward window and calculate AR from backward till forward window. date_loc = date_loc - window if date_loc > L1 and date_loc <= len(data_ret) - (2*window+1): index_range = (2*window) + 1 # Create dictionairy to store the AR values for each day of this event neg_dict_s_event = OrderedDict() for d in range(index_range): date_loc2 = date_loc + d # Parameters to estimate market model u_i = data_ret[s][date_loc2-L1 : date_loc2-1].mean() u_m = data_ret['^GSPC'][date_loc2-L1 : date_loc2-1].mean() R_i = data_ret.ix[date_loc2, s] R_m = data_ret.ix[date_loc2, '^GSPC'] beta_i = ((R_i-u_i)*(R_m - u_m))/(R_m - u_m)**2 alpha_i = u_i - (beta_i*u_m) var_err = (1/(L1 -2))*(R_i - alpha_i - (beta_i*R_m))**2 AR_i = R_i - alpha_i - (beta_i*R_m) neg_dict_s_event[date_loc2] = AR_i neg_dict_s[neg_event] = neg_dict_s_event neg_dict[s] = neg_dict_s # Create empty Abnormal Returns data frame abret_col = symbols_list[:] # Use [:] to deep copy the list abret_col.remove('^GSPC') # We dont't need to calculate abnormal returns for the S&P500 abret_index = range(-window, window+1) pos_data_abret = pd.DataFrame(index=abret_index, columns=abret_col) neg_data_abret = pd.DataFrame(index=abret_index, columns=abret_col) for h in abret_col: if h in pos_dict.keys(): for z in abret_index: pos_data_abret[h][z] = np.mean([x.values()[z+window] for x in pos_dict[h].values()]) for f in abret_col: if f in neg_dict.keys(): for v in abret_index: neg_data_abret[f][v] = np.mean([x.values()[v+window] for x in neg_dict[f].values()]) # Create Cumulative Abnormal Return (CAR) Tables pos_CAR and neg_CAR pos_CAR = pos_data_abret.cumsum() neg_CAR = neg_data_abret.cumsum() # Plot pos_CAR and neg_CAR plt.clf() plt.plot(pos_CAR) plt.legend(pos_CAR) plt.ylabel('CAR') plt.xlabel('Window') matplotlib.rcParams.update({'font.size': 8}) plt.savefig('PositiveCAR_All.png', format='png') plt.clf() plt.plot(neg_CAR) plt.legend(neg_CAR) plt.ylabel('CAR') plt.xlabel('Window') matplotlib.rcParams.update({'font.size': 8}) plt.savefig('NegativeCAR_All.png', format='png') # Sum CAR for positive and negative events to plot only the aggregate CAR pos_CAR['SUM'] = pos_CAR.sum(axis=1) neg_CAR['SUM'] = neg_CAR.sum(axis=1) plt.clf() plt.plot(pos_CAR['SUM']) plt.legend(pos_CAR['SUM']) plt.ylabel('CAR') plt.xlabel('Window') matplotlib.rcParams.update({'font.size': 8}) plt.savefig('PositiveCAR_SUM.png', format='png') plt.clf() plt.plot(neg_CAR['SUM']) plt.legend(neg_CAR['SUM']) plt.ylabel('CAR') plt.xlabel('Window') matplotlib.rcParams.update({'font.size': 8}) plt.savefig('NegativeCAR_SUM.png', format='png')
def p03_DAX(location): dax = DataReader('^GDAXI', 'yahoo', start='01/01/2000') dax.to_csv(work_directory_path(location, 'p03_DAX.csv')) return dax
mov_dt_window = datetime.timedelta(days=14) end_date = datetime.date.today() start_date = end_date - mov_dt_window port_trend = [] port_close = [] port = [] #def port_perf(portfolio_equities, start_date, end_date): #stocks_list = 'sp500' #tickers = retrieve_tickers.retrieve_tickers(stocks_list) #td = datetime.datetime.now().strftime('%Y-%m-%d') # Write 2 DataFrames for i in portfolio_equities: port = DataReader(i, "yahoo", start_date, end_date) #port['Date'] = port.index port['Symbol'] = i port_close.append(port.tail(1)) print port_close['Symbol'] #port = DataReader(portfolio_equities, "yahoo", start_date, end_date) #print port.to_frame() # 2. Retrieve stock data
def p03_AAPL(location): dax = DataReader('AAPL', 'yahoo', start='01/01/2000') dax.to_csv(work_directory_path(location, 'p03_AAPL.csv')) return dax
def savetickerinfo(SPH_id, ticker, security_id): rp_tkr = ticker.replace('.', '-') yrs_of_pricing = 10 yrs_of_adj = 10 enddate = date.today() tickerdata =\ DataReader(rp_tkr, "yahoo", datetime(enddate.year - yrs_of_adj, enddate.month, enddate.day)) tickerdata['Adj Factor'] =\ tickerdata['Close'].divide(tickerdata['Adj Close']) # tickerdata['Adj Factor Shifted'] =\ tickerdata['Adj Factor'].shift(1) # tickerdata['Adj Factor Old/New'] =\ tickerdata['Adj Factor Shifted'].divide(tickerdata['Adj Factor']) # # The data_to_cp does not need to exit if we are not limited on rows # to the extent we need past prices for the full period, # we may collapse these two calls into one DataFrame if yrs_of_pricing != yrs_of_adj: data_to_cp =\ DataReader(rp_tkr, "yahoo", datetime(enddate.year - yrs_of_pricing, enddate.month, enddate.day)) data_to_cp['Adj Factor'] =\ data_to_cp['Close'].divide(data_to_cp['Adj Close']) # data_to_cp['Adj Factor Shifted'] =\ data_to_cp['Adj Factor'].shift(1) # data_to_cp['Adj Factor Old/New'] =\ data_to_cp['Adj Factor Shifted'].divide(data_to_cp['Adj Factor']) else: data_to_cp = tickerdata # closepricesforsave = [] for a in data_to_cp.itertuples(): newcloseprice = ClosePrice(close_price=a[4], adj_close_price=a[5], close_date=str(datetime.date(a[0])), securitypricehist_id=SPH_id) closepricesforsave.append(newcloseprice) ClosePrice.objects.filter(securitypricehist_id=SPH_id)\ .delete() ClosePrice.objects.bulk_create(closepricesforsave) # splitrecords = tickerdata.loc[tickerdata['Adj Factor Old/New'] >= 1.1] # dictforsave = splitrecords.to_dict()['Adj Factor Old/New'] # for key in dictforsave: if not SplitOrAdjustmentEvent.objects.filter(security_id=security_id)\ .filter(event_date=str(datetime.date(key)))\ .exists(): SplitOrAdjustmentEvent( security_id=security_id, adjustment_factor=round(dictforsave[key], 2), event_date=str(datetime.date(key)))\ .save()
def p03_AAPL(tomtom): DAX = DataReader('AAPL','yahoo',start = '01/01/2000') DAX.to_csv(tomtom.get_tmp_name('p03_AAPL.csv'))
res.plot_coefficients_of_determination(figsize=(8, 2)) # ## Coincident Index # # As described above, the goal of this model was to create an # interpretable series which could be used to understand the current status # of the macroeconomy. This is what the coincident index is designed to do. # It is constructed below. For readers interested in an explanation of the # construction, see Kim and Nelson (1999) or Stock and Watson (1991). # # In essense, what is done is to reconstruct the mean of the (differenced) # factor. We will compare it to the coincident index on published by the # Federal Reserve Bank of Philadelphia (USPHCI on FRED). usphci = DataReader( 'USPHCI', 'fred', start='1979-01-01', end='2014-12-01')['USPHCI'] usphci.plot(figsize=(13, 3)) dusphci = usphci.diff()[1:].values def compute_coincident_index(mod, res): # Estimate W(1) spec = res.specification design = mod.ssm['design'] transition = mod.ssm['transition'] ss_kalman_gain = res.filter_results.kalman_gain[:, :, -1] k_states = ss_kalman_gain.shape[0] W1 = np.linalg.inv( np.eye(k_states) -