def getStocksList_CHN(root_path): try: df = queryStockList(root_path, "DB_STOCK", "SHEET_CHN") df.index = df.index.astype(str).str.zfill(6) except Exception as e: df = pd.DataFrame() if df.empty == False: return df stock_info = ts.get_stock_basics() listData = pd.DataFrame(stock_info) listData['daily_update'] = '1970-07-01' listData['weekly_update'] = '1970-07-01' listData['monthly_update'] = '1970-07-01' listData['news_update'] = '1970-07-01' listData.index.name = 'symbol' listData = listData.reset_index() #listData.index.name = 'symbol' #listData.index = listData.index.astype(str).str.zfill(6) #[str(symbol).zfill(6) for symbol in listData.index] #listData.index.astype(str).str.zfill(6) #print(listData.index) #listData['symbol'] = listData['symbol'].str.strip() storeStockList(root_path, "DB_STOCK", "SHEET_CHN", listData) df = queryStockList(root_path, "DB_STOCK", "SHEET_CHN") if df.empty == False: df.index = df.index.astype(str).str.zfill(6) return df
def getStocksList_US(root_path): try: df = queryStockList(root_path, "DB_STOCK", "SHEET_US") except Exception as e: df = pd.DataFrame() if df.empty == False: return df for exchange in ["NASDAQ", "NYSE"]: print("fetching " + exchange + " stocklist...") url = "http://www.nasdaq.com/screening/companies-by-industry.aspx?exchange=%s&render=download" % exchange repeat_times = 1 # repeat downloading in case of http error for _ in range(repeat_times): try: urlData = requests.get(url, timeout=15).content if df.empty: df = pd.read_csv(io.StringIO(urlData.decode('utf-8'))) else: df = pd.concat([ df, pd.read_csv(io.StringIO(urlData.decode('utf-8'))) ]) break except Exception as e: print("exception in getStocks:" + exchange, str(e)) continue df = df[(df['MarketCap'] > 100000000)] df = df.drop_duplicates(subset=['Symbol'], keep='first') df.sort_index(ascending=True, inplace=True) listData = df[['Symbol', 'Name', 'MarketCap', 'Sector', 'Industry']].copy() listData = listData.rename( columns={ 'Symbol': 'symbol', 'Name': 'name', 'MarketCap': 'market_cap', 'Sector': 'sector', 'Industry': 'industry' }) listData.loc[len(listData)] = [ 'SPY', 'SPDR S&P 500 ETF Trust', 0.0, '', '' ] listData.loc[len(listData)] = ['^VIX', 'VOLATILITY S&P 500', 0.0, '', ''] listData['symbol'] = listData['symbol'].str.strip() listData['daily_update'] = '1970-07-01' listData['weekly_update'] = '1970-07-01' listData['monthly_update'] = '1970-07-01' listData['news_update'] = '1970-07-01' listData = listData.reset_index(drop=True) storeStockList(root_path, "DB_STOCK", "SHEET_US", listData) return queryStockList(root_path, "DB_STOCK", "SHEET_US")
def getStocksList(root_path): try: df = queryStockList(root_path, "DB_STOCK", "SHEET_CHN_DAILY") df.index = df.index.astype(str).str.zfill(6) except Exception as e: df = pd.DataFrame() if df.empty == False: return df stock_info = ts.get_stock_basics() listData = pd.DataFrame(stock_info) #listData.index.name = 'Symbol' #listData.index = listData.index.astype(str).str.zfill(6) #[str(symbol).zfill(6) for symbol in listData.index] #listData.index.astype(str).str.zfill(6) #print(listData.index) #listData['Symbol'] = listData['Symbol'].str.strip() storeStockList(root_path, "DB_STOCK", "SHEET_CHN_DAILY", listData) df = queryStockList(root_path, "DB_STOCK", "SHEET_CHN_DAILY") df.index = df.index.astype(str).str.zfill(6) return df
def updateSingleStockData(root_path, symbol, from_date, till_date, force_check): startTime = time.time() message = "" if len(symbol) == 0: return startTime, message now_date = pd.Timestamp((datetime.datetime.now()).strftime("%Y-%m-%d")) start_date = pd.Timestamp(from_date) end_date = pd.Timestamp(till_date) if end_date == now_date: end_date = end_date - datetime.timedelta(days=1) stockData, lastUpdateTime = queryStock(root_path, "DB_STOCK", "SHEET_US", "_WEEKLY", symbol, "weekly_update") if stockData.empty: stockData, message = getSingleStock(symbol, from_date, till_date) if stockData.empty == False: storeStock(root_path, "DB_STOCK", "SHEET_US", "_WEEKLY", symbol, stockData, "weekly_update") first_date = pd.Timestamp(stockData.index[0]) to_date = (first_date - datetime.timedelta(days=1)).strftime("%Y-%m-%d") if judgeNeedPreDownload(root_path, symbol, first_date, from_date, to_date): storePublishDay(root_path, "DB_STOCK", "SHEET_US", symbol, first_date.strftime("%Y-%m-%d")) message = message + ", database updated" else: print("get stock from network failed", symbol) return startTime, message modified = False savePublishDay = False first_date = pd.Timestamp(stockData.index[0]) last_date = pd.Timestamp(stockData.index[-1]) if start_date < first_date: to_date = (first_date - datetime.timedelta(days=1)).strftime("%Y-%m-%d") if judgeNeedPreDownload(root_path, symbol, first_date, from_date, to_date): message = message + ", download pre data from " + from_date + " to " + to_date moreStockData, tempMessage = getSingleStock( symbol, from_date, to_date) message = message + tempMessage if len(moreStockData) > 0: if isinstance(moreStockData.index, pd.DatetimeIndex): moreStockData.index = moreStockData.index.strftime( "%Y-%m-%d") modified = True stockData = pd.concat([moreStockData, stockData]) stockData.index.name = 'date' else: savePublishDay = True storePublishDay(root_path, "DB_STOCK", "SHEET_US", symbol, first_date.strftime("%Y-%m-%d")) message = message + ", save stock publish(IPO) day, next time won't check it again" updateOnce = now_date > lastUpdateTime if (end_date > last_date) and (updateOnce or force_check): to_date = (last_date + datetime.timedelta(days=1)).strftime("%Y-%m-%d") if judgeNeedPostDownload(now_date, to_date, till_date): message = message + ", download post data from " + to_date + " to " + till_date moreStockData, tempMessage = getSingleStock( symbol, to_date, till_date) message = message + tempMessage if len(moreStockData) > 0: if isinstance(moreStockData.index, pd.DatetimeIndex): moreStockData.index = moreStockData.index.strftime( "%Y-%m-%d") modified = True stockData = pd.concat([stockData, moreStockData]) stockData.index.name = 'date' if modified: stockData = stockData[~stockData.index.duplicated(keep='first')] storeStock(root_path, "DB_STOCK", "SHEET_US", "_WEEKLY", symbol, stockData, "weekly_update") elif updateOnce: now_date = datetime.datetime.now().strftime("%Y-%m-%d") stockList = queryStockList(root_path, "DB_STOCK", "SHEET_US") if stockList[stockList.index == symbol]['daily_update'][0] != now_date: stockList.set_value(symbol, 'weekly_update', now_date) storeStockList(root_path, "DB_STOCK", "SHEET_US", stockList, symbol) elif savePublishDay == False: message = "" return startTime, message