def collector_data(self): """collector data """ logger.info("start collector yahoo data......") error_symbol = [] with ThreadPoolExecutor(max_workers=self.max_workers) as worker: futures = {} p_bar = tqdm(total=len(self.stock_list)) for symbols in [ self.stock_list[i:i + self.max_workers] for i in range(0, len(self.stock_list), self.max_workers) ]: resp = Ticker( symbols, asynchronous=True, max_workers=self.max_workers).history(period="max") if isinstance(resp, dict): for symbol, df in resp.items(): if isinstance(df, pd.DataFrame): futures[worker.submit( self.save_stock, symbol, df.reset_index().rename( columns={"index": "date"}))] = symbol else: error_symbol.append(symbol) else: for symbol, df in resp.reset_index().groupby("symbol"): futures[worker.submit(self.save_stock, symbol, df)] = symbol p_bar.update(self.max_workers) p_bar.close() with tqdm(total=len(futures.values())) as p_bar: for future in as_completed(futures): try: future.result() except Exception as e: logger.error(e) error_symbol.append(futures[future]) p_bar.update() logger.info(error_symbol) logger.info(len(error_symbol)) logger.info(len(self.stock_list)) # TODO: from MSN df = pd.DataFrame( map(lambda x: x.split(","), requests.get(CSI300_BENCH_URL).json()["data"]["klines"])) df.columns = [ "date", "open", "close", "high", "low", "volume", "money", "change" ] df["date"] = pd.to_datetime(df["date"]) df = df.astype(float, errors="ignore") df["adjclose"] = df["close"] df.to_csv(self.save_dir.joinpath("sh000300.csv"), index=False)
def _collector(self, stock_list): error_symbol = [] with ThreadPoolExecutor(max_workers=self.max_workers) as worker: futures = {} p_bar = tqdm(total=len(stock_list)) for symbols in [ stock_list[i:i + self.max_workers] for i in range(0, len(stock_list), self.max_workers) ]: self._sleep() resp = Ticker( symbols, asynchronous=self._asynchronous, max_workers=self.max_workers).history(period="max") if isinstance(resp, dict): for symbol, df in resp.items(): if isinstance(df, pd.DataFrame): self._temp_save_small_data(self, df) futures[worker.submit( self.save_stock, symbol, df.reset_index().rename( columns={"index": "date"}))] = symbol else: error_symbol.append(symbol) else: for symbol, df in resp.reset_index().groupby("symbol"): self._temp_save_small_data(self, df) futures[worker.submit(self.save_stock, symbol, df)] = symbol p_bar.update(self.max_workers) p_bar.close() with tqdm(total=len(futures.values())) as p_bar: for future in as_completed(futures): try: future.result() except Exception as e: logger.error(e) error_symbol.append(futures[future]) p_bar.update() print(error_symbol) logger.info(f"error symbol nums: {len(error_symbol)}") logger.info(f"current get symbol nums: {len(stock_list)}") error_symbol.extend(self._mini_symbol_map.keys()) return error_symbol
def priceChecker(debug, df): if debug == True: randStocks = df.sample(n=3).loc[:, "yf_ticker"].values.tolist() cfm_prices = Ticker(randStocks).price for stock, value in cfm_prices.items(): price = value['regularMarketPrice'] listprice = df.loc[stock, "regularMarketPrice"] if listprice == price: print("Stock: " + str(stock) + " " + str(df.loc[stock, "Trading Name"]) + ",\tPrices - Actual: " + str(price) + ",\tWorkers: " + str(listprice)) else: print( "The prices fetched are not accurately updated. Please check the merge and update functions in 'Updater.py'." ) return 1 print("\nSuccessful updating of df!")