def get_data_from_remote(symbol, interval, start, end, show_1min_logging: bool = False): error_msg = f"{symbol}-{interval}-{start}-{end}" def _show_logging_func(): if interval == YahooCollector.INTERVAL_1min and show_1min_logging: logger.warning(f"{error_msg}:{_resp}") interval = "1m" if interval in ["1m", "1min"] else interval try: _resp = Ticker(symbol, asynchronous=False).history(interval=interval, start=start, end=end) if isinstance(_resp, pd.DataFrame): return _resp.reset_index() elif isinstance(_resp, dict): _temp_data = _resp.get(symbol, {}) if isinstance( _temp_data, str) or (isinstance(_resp, dict) and _temp_data.get( "indicators", {}).get("quote", None) is None): _show_logging_func() else: _show_logging_func() except Exception as e: logger.warning(f"{error_msg}:{e}")
def get_data_from_remote(symbol, interval, start, end, show_1min_logging: bool = False): error_msg = f"{symbol}-{interval}-{start}-{end}" def _show_logging_func(): if interval == YahooCollector.INTERVAL_1min and show_1min_logging: logger.warning(f"{error_msg}:{_resp}") interval = "1m" if interval in ["1m", "1min"] else interval try: _resp = Ticker(symbol, asynchronous=False).history(interval=interval, start=start, end=end) if isinstance(_resp, pd.DataFrame): return _resp.reset_index() elif isinstance(_resp, dict): _temp_data = _resp.get(symbol, {}) if isinstance( _temp_data, str) or (isinstance(_resp, dict) and _temp_data.get( "indicators", {}).get("quote", None) is None): _show_logging_func() else: _show_logging_func() except Exception as e: logger.warning( f"get data error: {symbol}--{start_}--{end_}" + "Your data request fails. This may be caused by your firewall (e.g. GFW). Please switch your network if you want to access Yahoo! data" )
def collector_data(self): """collector data """ logger.info("start collector yahoo data......") error_symbol = [] with ThreadPoolExecutor(max_workers=self.max_workers) as worker: futures = {} p_bar = tqdm(total=len(self.stock_list)) for symbols in [ self.stock_list[i:i + self.max_workers] for i in range(0, len(self.stock_list), self.max_workers) ]: resp = Ticker( symbols, asynchronous=True, max_workers=self.max_workers).history(period="max") if isinstance(resp, dict): for symbol, df in resp.items(): if isinstance(df, pd.DataFrame): futures[worker.submit( self.save_stock, symbol, df.reset_index().rename( columns={"index": "date"}))] = symbol else: error_symbol.append(symbol) else: for symbol, df in resp.reset_index().groupby("symbol"): futures[worker.submit(self.save_stock, symbol, df)] = symbol p_bar.update(self.max_workers) p_bar.close() with tqdm(total=len(futures.values())) as p_bar: for future in as_completed(futures): try: future.result() except Exception as e: logger.error(e) error_symbol.append(futures[future]) p_bar.update() logger.info(error_symbol) logger.info(len(error_symbol)) logger.info(len(self.stock_list)) # TODO: from MSN df = pd.DataFrame( map(lambda x: x.split(","), requests.get(CSI300_BENCH_URL).json()["data"]["klines"])) df.columns = [ "date", "open", "close", "high", "low", "volume", "money", "change" ] df["date"] = pd.to_datetime(df["date"]) df = df.astype(float, errors="ignore") df["adjclose"] = df["close"] df.to_csv(self.save_dir.joinpath("sh000300.csv"), index=False)
def _gen_real_prices(self): ''' Attempt to generate real prices. Returns None if prices cannot be found ''' prices = Ticker(f'{self.code}.AX').history(start=self.dates[0], end=TOMORROW) if isinstance(prices, dict): return None prices = prices.reset_index()[['date', 'close']].set_index('date') prices.columns = ['price'] return prices
def _collector(self, stock_list): error_symbol = [] with ThreadPoolExecutor(max_workers=self.max_workers) as worker: futures = {} p_bar = tqdm(total=len(stock_list)) for symbols in [ stock_list[i:i + self.max_workers] for i in range(0, len(stock_list), self.max_workers) ]: self._sleep() resp = Ticker( symbols, asynchronous=self._asynchronous, max_workers=self.max_workers).history(period="max") if isinstance(resp, dict): for symbol, df in resp.items(): if isinstance(df, pd.DataFrame): self._temp_save_small_data(self, df) futures[worker.submit( self.save_stock, symbol, df.reset_index().rename( columns={"index": "date"}))] = symbol else: error_symbol.append(symbol) else: for symbol, df in resp.reset_index().groupby("symbol"): self._temp_save_small_data(self, df) futures[worker.submit(self.save_stock, symbol, df)] = symbol p_bar.update(self.max_workers) p_bar.close() with tqdm(total=len(futures.values())) as p_bar: for future in as_completed(futures): try: future.result() except Exception as e: logger.error(e) error_symbol.append(futures[future]) p_bar.update() print(error_symbol) logger.info(f"error symbol nums: {len(error_symbol)}") logger.info(f"current get symbol nums: {len(stock_list)}") error_symbol.extend(self._mini_symbol_map.keys()) return error_symbol
def _get_simple(start_, end_): self._sleep() error_msg = f"{symbol}-{self._interval}-{start_}-{end_}" def _show_logging_func(): if self._interval == "1m" and self._show_1m_logging: logger.warning(f"{error_msg}:{_resp}") try: _resp = Ticker(symbol, asynchronous=False).history(interval=self._interval, start=start_, end=end_) if isinstance(_resp, pd.DataFrame): return _resp.reset_index() elif isinstance(_resp, dict): _temp_data = _resp.get(symbol, {}) if isinstance(_temp_data, str) or ( isinstance(_resp, dict) and _temp_data.get("indicators", {}).get("quote", None) is None ): _show_logging_func() else: _show_logging_func() except Exception as e: logger.warning(f"{error_msg}:{e}")
import numpy as np from yahooquery import Ticker import armagarch as ag import multiprocessing as mp price = Ticker('AAPL').history(period='10y', interval='1d') price.reset_index(inplace=True) price.drop('symbol', axis=1, inplace=True) price.set_index('date', inplace=True) log_returns = np.log(price['adjclose'] / price['adjclose'].shift(1)).dropna() T = len(log_returns) intervals = [(i - 500, i) for i in range(500, T - 1)] def one_step_var(interval): t1, t2 = interval X = log_returns.values[t1:t2] prediction_date = log_returns.index[t2 + 1].strftime('%Y-%m-%d') model = ag.VaRModel() model.fit(X, 2, 2, verbose=False, summary_stats=False) value_at_risk95 = model.predict(X, threshold=0.95) with open( '/home/howardwong/Desktop/Research/ARMA-GARCH-Models/data/var-forecasts/{}.txt' .format(prediction_date), 'w') as f: f.write(prediction_date + ',' + str(value_at_risk95) + ',' + str(log_returns.values[t2 + 1])) f.close()
import streamlit as st from yahooquery import Ticker data = Ticker("ORSTED.CO").balance_sheet(frequency="annual") st.header("Raw Data") st.dataframe(data) def get_default_format(data): format_dict = {} for column in data.columns: if data[column].dtype == "int64": format_dict[column] = "{0:,.0f}" elif data[column].dtype == "float64": format_dict[column] = "{0:,.2f}" else: format_dict[column] = "{0:}" return format_dict st.header("Dataframe Index dropped and decimal symbol format applied") data_reset_index = data.reset_index() format_dict = get_default_format(data_reset_index) st.dataframe(data_reset_index.style.format(format_dict)) st.header("Dataframe Index NOT dropped and decimal symbol format applied") format_dict = get_default_format(data) st.dataframe(data.style.format(format_dict))