def GetTopVolume(): cssSelector = '#divStockList' url = f'https://goodinfo.tw/tw/StockList.asp?RPT_TIME=&MARKET_CAT=熱門排行&INDUSTRY_CAT=日成交張數創近期新高日數@@成交張數@@日成交張數創近期新高日數' print(url) try: df = Utils.GetDataFrameByCssSelector(url, cssSelector) #return df except: time.sleep(random.randint(20, 30)) df = Utils.GetDataFrameByCssSelector(url, cssSelector) print(df) #df.columns = df.columns.get_level_values(1) df.columns = df.columns.get_level_values(0) df = df.drop_duplicates(keep=False, inplace=False) #gain = pd.to_numeric(df['漲跌 價'], errors='coerce') > 0 #market = df['市 場'] == '市' length = df['代號'].astype(str).map(len) == 4 #df = df[gain & length] df = df[length] df.to_csv(f'{Utils.GetRootPath()}\Data\Daily\日成交張數創近期新高日數.csv', encoding='utf_8_sig') return df['代號'].values
def GetFinData(stockId): url = f'https://goodinfo.tw/StockInfo/StockFinDetail.asp?RPT_CAT=XX_M_QUAR_ACC&STOCK_ID={stockId}' css_selector = '#txtFinBody' try: df = Utils.GetDataFrameByCssSelector(url, css_selector) except: time.sleep(random.randint(20, 30)) df = Utils.GetDataFrameByCssSelector(url, css_selector) #print(df) return df
def GetDividend(stockId): url = f'https://goodinfo.tw/tw/StockDividendPolicy.asp?STOCK_ID={stockId}' cssSelector = '#divDetail' try: df = Utils.GetDataFrameByCssSelector(url, cssSelector) df.columns = df.columns.get_level_values(3) except: time.sleep(random.randint(20, 30)) df = Utils.GetDataFrameByCssSelector(url, cssSelector) df.columns = df.columns.get_level_values(3) # column replace space df.columns = df.columns.str.replace(' ', '') # filter not ∟ df = df[df['股利發放年度'] != '∟'] #print(df) # 年度大於2022, 移除第一列 firstRow = df.iloc[0, :] if int(firstRow['股利發放年度']) > datetime.now().year: df = df.iloc[1: , :] rowsCount = 5 # 年度(取前5筆, index重新排序) year = pd.to_numeric(df.iloc[:, 0], errors='coerce').dropna(how='any',axis=0).head(rowsCount).astype(int).reset_index(drop=True) #print(year) # 現金(取前5筆, index重新排序) cash = pd.to_numeric(df.iloc[:, 3], errors='coerce').dropna(how='any',axis=0).head(rowsCount).reset_index(drop=True) #print(cash) # 股票(取前5筆, index重新排序) stock = pd.to_numeric(df.iloc[:, 6], errors='coerce').dropna(how='any',axis=0).head(rowsCount).reset_index(drop=True) #print(stock) data = [] for index in range(0, rowsCount): data.append(str(cash[index]).rjust(6) + ' / ' + str(stock[index]).rjust(6)) print(data) df = pd.DataFrame([data], columns=year) return df
def GetAllDividend(): cssSelector = '#divStockList' for rankIndex in range(0, 6): url = f'https://goodinfo.tw/tw/StockList.asp?SHEET=股利政策&MARKET_CAT=熱門排行&INDUSTRY_CAT=合計股利&RANK={str(rankIndex)}' print(url) # 休息10~20秒 time.sleep(random.randint(10, 20)) try: df = Utils.GetDataFrameByCssSelector(url, cssSelector) #return df except: time.sleep(random.randint(20, 30)) df = Utils.GetDataFrameByCssSelector(url, cssSelector) print(df) #df.columns = df.columns.get_level_values(1) df.columns = df.columns.get_level_values(0) df = df.drop_duplicates(keep=False, inplace=False) #移除重複標題 #gain = pd.to_numeric(df['漲跌 價'], errors='coerce') > 0 #market = df['市 場'] == '市' print(df) length = df['代號'].astype(str).map(len) == 4 #df = df[gain & length] df = df[length] filePath = f'{Utils.GetRootPath()}\Data\Yearly\合計股利.csv' if rankIndex == 0: df.to_csv(filePath, encoding='utf_8_sig') else: df.to_csv(filePath, mode='a', header=False, encoding='utf_8_sig') # 去除重複標頭 #sum_df[sum_df.ne(sum_df.columns).any(1)].to_csv(f'{Utils.GetRootPath()}\Data\Monthly\董監持股比例.csv',encoding='utf_8_sig') print('執行完成')
def WriteData(): cssSelector = '#divStockList' sum_df = pd.DataFrame() for rankIndex in range(0, 5): url = f'https://goodinfo.tw/tw/StockList.asp?SHEET=董監持股&MARKET_CAT=熱門排行&INDUSTRY_CAT=全體董監持股比例&RANK={str(rankIndex)}' print(url) try: time.sleep(random.randint(5, 10)) df = Utils.GetDataFrameByCssSelector(url, cssSelector) print(df) sum_df = pd.concat([sum_df, df], axis=0) #df.columns = df.columns.get_level_values(1) except: time.sleep(random.randint(20, 30)) df = Utils.GetDataFrameByCssSelector(url, cssSelector) print(df) #df.columns = df.columns.get_level_values(1) # 去除重複標頭 sum_df[sum_df.ne(sum_df.columns).any(1)].to_csv( f'{Utils.GetRootPath()}\Data\Monthly\董監持股比例.csv', encoding='utf_8_sig')
def GetPE(stockId): url = f'https://goodinfo.tw/StockInfo/ShowK_ChartFlow.asp?RPT_CAT=PER&STOCK_ID={stockId}&CHT_CAT=WEEK' css_selector = '#divK_ChartFlowDetail' try: df = Utils.GetDataFrameByCssSelector(url, css_selector) # 取前兩列後面倒數6欄資料 firtRowDf = df.iloc[0,-6:] #print(firtRowDf) except: time.sleep(random.randint(20, 30)) df = Utils.GetDataFrameByCssSelector(url, css_selector) # 取前兩列後面倒數6欄資料 firtRowDf = df.iloc[0,-6:] #print(firtRowDf) #dataframe轉成dictionary 參考 https://stackoverflow.com/questions/45452935/pandas-how-to-get-series-to-dict dictionaries = [dict(key=re.findall(r'[0-9]+[.]?[0-9]*', str(k))[0], value=v) for k, v in firtRowDf.items()] #print(data) # 轉換成dataframe data = [] headers = ['本益比-級距1倍數', '本益比-級距1價格', '本益比-級距2倍數', '本益比-級距2價格', '本益比-級距3倍數', '本益比-級距3價格', '本益比-級距4倍數', '本益比-級距4價格', '本益比-級距5倍數', '本益比-級距5價格', '本益比-級距6倍數', '本益比-級距6價格'] for entry in dictionaries: #print(entry) data.append(entry['key']) data.append(entry['value']) ##print(headers) #print(data) df = pd.DataFrame([data], columns=headers) return df
def GetTransaction(stockId): url = f'https://goodinfo.tw/tw/ShowK_Chart.asp?STOCK_ID={stockId}&CHT_CAT2=DATE' cssSelector = '#divPriceDetail' try: df = Utils.GetDataFrameByCssSelector(url, cssSelector) df.columns = df.columns.get_level_values(1) except: time.sleep(random.randint(20, 30)) df = Utils.GetDataFrameByCssSelector(url, cssSelector) df.columns = df.columns.get_level_values(1) # 印出全部的rows #pd.set_option('display.max_rows', df.shape[0]+1) #print(df) headers = ['收盤', '張數', '外資 持股 (%)', '券資 比 (%)'] smaPeroids = [1, 5, 20, 60] dict = {} for header in headers: try: #print(header) entry = '' for period in smaPeroids: #print(df[header]) data = pd.to_numeric(df[header], errors='coerce').dropna( how='any', axis=0).head(period) #print(data) sma = round(data.mean(), 2) #print(sma) entry += ('' if entry == '' else ' / ') + str(sma).rjust(8) #print(header.replace(' ', '')) #print(entry) if header == '收盤': data = [x.strip() for x in entry.split('/')] prefixIcon = '' if float(data[0]) > float(data[1]) and float(data[0]) > float( data[2]): prefixIcon = '👍' elif float(data[0]) < float(data[3]): prefixIcon = '👎' entry = prefixIcon + entry # 成交量 > 5ma 3倍 if header == '張數': data = [x.strip() for x in entry.split('/')] if (float(data[0]) / float(data[1]) > 3.0): entry = '🏆' + entry dict.update({ header.replace(' ', '') + '(' + 'ma / '.join( map(str, smaPeroids)) + 'ma)': str(entry) }) except: dict.update({ header.replace(' ', '') + '(' + 'ma / '.join( map(str, smaPeroids)) + 'ma)': '' }) #print(dict) result = pd.DataFrame([dict]) return result