def GetTopVolume(): cssSelector = '#divStockList' url = f'https://goodinfo.tw/tw/StockList.asp?RPT_TIME=&MARKET_CAT=熱門排行&INDUSTRY_CAT=日成交張數創近期新高日數@@成交張數@@日成交張數創近期新高日數' print(url) try: df = Utils.GetDataFrameByCssSelector(url, cssSelector) #return df except: time.sleep(random.randint(20, 30)) df = Utils.GetDataFrameByCssSelector(url, cssSelector) print(df) #df.columns = df.columns.get_level_values(1) df.columns = df.columns.get_level_values(0) df = df.drop_duplicates(keep=False, inplace=False) #gain = pd.to_numeric(df['漲跌 價'], errors='coerce') > 0 #market = df['市 場'] == '市' length = df['代號'].astype(str).map(len) == 4 #df = df[gain & length] df = df[length] df.to_csv(f'{Utils.GetRootPath()}\Data\Daily\日成交張數創近期新高日數.csv', encoding='utf_8_sig') return df['代號'].values
def update_with_project_id(project_id, start_nodes, relationship, config_order): """ 通过项目ID 更新 项目对应的model :return: """ try: start_nodes = utils.list_str_to_list(start_nodes) relationship = utils.list_str_to_list(relationship) relationship_item_str = [] for item in relationship: relationship_item_str.append(str(item)) config = json.dumps( { 'config_order': config_order, 'relationship': '*,'.join(relationship_item_str) }, ensure_ascii=False) query = db.session.query(Model) query.filter(Model.project_id == project_id).update({ Model.start_nodes: ','.join(start_nodes), Model.config: config }) db.session.commit() print('更新完成') return True except Exception: print(traceback.print_exc()) return False
def GetFinData(stockId): url = f'https://goodinfo.tw/StockInfo/StockFinDetail.asp?RPT_CAT=XX_M_QUAR_ACC&STOCK_ID={stockId}' css_selector = '#txtFinBody' try: df = Utils.GetDataFrameByCssSelector(url, css_selector) except: time.sleep(random.randint(20, 30)) df = Utils.GetDataFrameByCssSelector(url, css_selector) #print(df) return df
def GetFinDetail(stockId): df = GetFinData(stockId) dict = {} headers = GetFinHeaders() for header in headers: if header == '本業收益': try: # 本業收益(營業利益率/稅前淨利率) > 60% tempDict = round( Decimal(dict['營業利益率']) / Decimal(dict['稅前淨利率']) * 100, 2) dict.update({'本業收益': str(tempDict)}) except: dict.update({'本業收益': '0'}) else: try: #print(header) tempDict = Utils.GetDataFrameValueByLabel(df, '獲利能力', header) dict.update({ header.replace('股東權益報酬率 (年預估)', 'ROE'): str(Decimal(tempDict[0])) }) except: dict.update({header.replace('股東權益報酬率 (年預估)', 'ROE'): '0'}) df = pd.DataFrame([dict]) return df
def getDirContent(req): path = req.POST.get('path', None) if path is not None: Folder = Utils.Folder(path) dataJson = Folder.getFolderJson() return HttpResponse(dataJson, content_type="application/json") return HttpResponse(json.dumps({}), content_type="application/json")
def decide_include_article(publishTime): if (publishTime == ''): return True # could not find publishedTime in HTML, so include it in results # minimalAllowedDateAsStr was already calculated in main() publishedDateAsStr = Utils.date_to_str(publishTime) logger.debug("publishedDateAsStr=%s minimalAllowedDateAsStr=%s", publishedDateAsStr, Utils.minimalAllowedDateAsStr) return (publishedDateAsStr >= Utils.minimalAllowedDateAsStr)
def mkdir(req): path = req.POST.get("path") fileOperator = Utils.fileOperator() fileOperator.mkdir(path) response = { "ok": True, } return HttpResponse(json.dumps(response), content_type="application/json")
def find_times(bs): publishedAt = "" updatedAt = "" try: logger.debug("1") elements = bs.find_all('time') if (elements is not None) and (len(elements) > 0): publishedAt = elements[0]['datetime'] updatedAt = elements[1]['datetime'] else: published = bs.head.find(name='meta', attrs={"property": "article:published"}) if (published is not None): logger.debug("1.1") publishedAt = bs.head.find(name='meta', attrs={ "property": "article:published" }).attrs['content'] if bs.head.find(name='meta', attrs={"property": "article:modified"}) is not None: logger.debug("1.2") updatedAt = bs.head.find(name='meta', attrs={ "property": "article:modified" }).attrs['content'] except: pass if (publishedAt == '' and updatedAt == ''): try: publishedAt = bs.head.find(name='meta', attrs={ "property": "og:pubdate" }).attrs['content'] logger.debug("1.3") updatedAt = bs.html.find( lambda tag: tag.name == "time" and "datetime" in tag.attrs. keys()).attrs['datetime'] #publishedAt = updatedAt except: pass if (publishedAt == ''): publishedAt = Utils.today() #'2020-03-25T00:01:00+0200' if (updatedAt == ''): updatedAt = Utils.today() # '2020-03-25T00:01:00+0200' return publishedAt, updatedAt
def deleteFiles(req): deleteList = req.POST.get('deleteList', None).split(",") fileOperator = Utils.fileOperator() for file in deleteList: fileOperator.forceRemove(file) response = { "ok": True, } return HttpResponse(json.dumps(response), content_type="application/json")
def GetDividend(stockId): url = f'https://goodinfo.tw/tw/StockDividendPolicy.asp?STOCK_ID={stockId}' cssSelector = '#divDetail' try: df = Utils.GetDataFrameByCssSelector(url, cssSelector) df.columns = df.columns.get_level_values(3) except: time.sleep(random.randint(20, 30)) df = Utils.GetDataFrameByCssSelector(url, cssSelector) df.columns = df.columns.get_level_values(3) # column replace space df.columns = df.columns.str.replace(' ', '') # filter not ∟ df = df[df['股利發放年度'] != '∟'] #print(df) # 年度大於2022, 移除第一列 firstRow = df.iloc[0, :] if int(firstRow['股利發放年度']) > datetime.now().year: df = df.iloc[1: , :] rowsCount = 5 # 年度(取前5筆, index重新排序) year = pd.to_numeric(df.iloc[:, 0], errors='coerce').dropna(how='any',axis=0).head(rowsCount).astype(int).reset_index(drop=True) #print(year) # 現金(取前5筆, index重新排序) cash = pd.to_numeric(df.iloc[:, 3], errors='coerce').dropna(how='any',axis=0).head(rowsCount).reset_index(drop=True) #print(cash) # 股票(取前5筆, index重新排序) stock = pd.to_numeric(df.iloc[:, 6], errors='coerce').dropna(how='any',axis=0).head(rowsCount).reset_index(drop=True) #print(stock) data = [] for index in range(0, rowsCount): data.append(str(cash[index]).rjust(6) + ' / ' + str(stock[index]).rjust(6)) print(data) df = pd.DataFrame([data], columns=year) return df
def copyFiles(req): needCopyFileList = req.POST.get('needCopyFileList', None).split(",") targetPath = req.POST.get('targetPath', None) isMove = req.POST.get('isMove', False) fileOperator = Utils.fileOperator() fileOperator.copyFiles(needCopyFileList, targetPath, False if isMove != "true" else True) response = { "ok": True, } return HttpResponse(json.dumps(response), content_type="application/json")
def browseUrl(url): ts = time() logger.debug("[%s] loading %s...", 'id', url) request = Request(url, headers={'User-Agent': Constants.user_agent}) ## # html = Utils.load_html(request) # ## logger.debug('[%s] loading completed in %s seconds', 'id', time() - ts) ts = time() return html
def main(req): #global rootpath #json_f=open('./app/rootpath.conf','r') #jsonObj=json.load(json_f) #rootpath=jsonObj["rootpath"] Folder = Utils.Folder(rootpath) dataJson = Folder.getFolderJson() language = req.COOKIES.get('language') if language == "en": return render(req, "index_en-US.html", {"dataJson": dataJson}) if language == "cn": return render(req, "index_zh-CN.html", {"dataJson": dataJson})
def GetAllDividend(): cssSelector = '#divStockList' for rankIndex in range(0, 6): url = f'https://goodinfo.tw/tw/StockList.asp?SHEET=股利政策&MARKET_CAT=熱門排行&INDUSTRY_CAT=合計股利&RANK={str(rankIndex)}' print(url) # 休息10~20秒 time.sleep(random.randint(10, 20)) try: df = Utils.GetDataFrameByCssSelector(url, cssSelector) #return df except: time.sleep(random.randint(20, 30)) df = Utils.GetDataFrameByCssSelector(url, cssSelector) print(df) #df.columns = df.columns.get_level_values(1) df.columns = df.columns.get_level_values(0) df = df.drop_duplicates(keep=False, inplace=False) #移除重複標題 #gain = pd.to_numeric(df['漲跌 價'], errors='coerce') > 0 #market = df['市 場'] == '市' print(df) length = df['代號'].astype(str).map(len) == 4 #df = df[gain & length] df = df[length] filePath = f'{Utils.GetRootPath()}\Data\Yearly\合計股利.csv' if rankIndex == 0: df.to_csv(filePath, encoding='utf_8_sig') else: df.to_csv(filePath, mode='a', header=False, encoding='utf_8_sig') # 去除重複標頭 #sum_df[sum_df.ne(sum_df.columns).any(1)].to_csv(f'{Utils.GetRootPath()}\Data\Monthly\董監持股比例.csv',encoding='utf_8_sig') print('執行完成')
def WriteData(): cssSelector = '#divStockList' sum_df = pd.DataFrame() for rankIndex in range(0, 5): url = f'https://goodinfo.tw/tw/StockList.asp?SHEET=董監持股&MARKET_CAT=熱門排行&INDUSTRY_CAT=全體董監持股比例&RANK={str(rankIndex)}' print(url) try: time.sleep(random.randint(5, 10)) df = Utils.GetDataFrameByCssSelector(url, cssSelector) print(df) sum_df = pd.concat([sum_df, df], axis=0) #df.columns = df.columns.get_level_values(1) except: time.sleep(random.randint(20, 30)) df = Utils.GetDataFrameByCssSelector(url, cssSelector) print(df) #df.columns = df.columns.get_level_values(1) # 去除重複標頭 sum_df[sum_df.ne(sum_df.columns).any(1)].to_csv( f'{Utils.GetRootPath()}\Data\Monthly\董監持股比例.csv', encoding='utf_8_sig')
def GetPE(stockId): url = f'https://goodinfo.tw/StockInfo/ShowK_ChartFlow.asp?RPT_CAT=PER&STOCK_ID={stockId}&CHT_CAT=WEEK' css_selector = '#divK_ChartFlowDetail' try: df = Utils.GetDataFrameByCssSelector(url, css_selector) # 取前兩列後面倒數6欄資料 firtRowDf = df.iloc[0,-6:] #print(firtRowDf) except: time.sleep(random.randint(20, 30)) df = Utils.GetDataFrameByCssSelector(url, css_selector) # 取前兩列後面倒數6欄資料 firtRowDf = df.iloc[0,-6:] #print(firtRowDf) #dataframe轉成dictionary 參考 https://stackoverflow.com/questions/45452935/pandas-how-to-get-series-to-dict dictionaries = [dict(key=re.findall(r'[0-9]+[.]?[0-9]*', str(k))[0], value=v) for k, v in firtRowDf.items()] #print(data) # 轉換成dataframe data = [] headers = ['本益比-級距1倍數', '本益比-級距1價格', '本益比-級距2倍數', '本益比-級距2價格', '本益比-級距3倍數', '本益比-級距3價格', '本益比-級距4倍數', '本益比-級距4價格', '本益比-級距5倍數', '本益比-級距5價格', '本益比-級距6倍數', '本益比-級距6價格'] for entry in dictionaries: #print(entry) data.append(entry['key']) data.append(entry['value']) ##print(headers) #print(data) df = pd.DataFrame([data], columns=headers) return df
def main(req): global rootpath try: with open("./app/rootpath.conf") as root: rootpath = root.read() except Exception: pass Folder = Utils.Folder(rootpath) dataJson = Folder.getFolderJson() language = req.COOKIES.get('language') if language == "en": return render(req, "index_en-US.html", {"dataJson": dataJson}) if language == "cn": return render(req, "index_zh-CN.html", {"dataJson": dataJson})
def main(req): global rootpath # try: # with open("./app/rootpath.conf") as root: # rootpath=root.read() # except Exception: # pass if platform.system() == "Windows": rootpath = "D:\web_file_root\SC-TestCase" elif platform.system() == "Linux": rootpath = "/opt/web_file_root" logUtil.logger.info("view.rootpath" + rootpath) Folder = Utils.Folder(rootpath) dataJson = Folder.getFolderJson() language = req.COOKIES.get('language') if language == "en": return render(req, "index_en-US.html", {"dataJson": dataJson}) if language == "cn": return render(req, "index_zh-CN.html", {"dataJson": dataJson})
def test_different_ids(self): self.assertNotEqual(Utils.generate_id("Sanidad"), Utils.generate_id("Educación"))
def test_equals_ids(self): self.assertEqual(Utils.generate_id("Sanidad"), Utils.generate_id("Sanidad"))
def GetTransaction(stockId): url = f'https://goodinfo.tw/tw/ShowK_Chart.asp?STOCK_ID={stockId}&CHT_CAT2=DATE' cssSelector = '#divPriceDetail' try: df = Utils.GetDataFrameByCssSelector(url, cssSelector) df.columns = df.columns.get_level_values(1) except: time.sleep(random.randint(20, 30)) df = Utils.GetDataFrameByCssSelector(url, cssSelector) df.columns = df.columns.get_level_values(1) # 印出全部的rows #pd.set_option('display.max_rows', df.shape[0]+1) #print(df) headers = ['收盤', '張數', '外資 持股 (%)', '券資 比 (%)'] smaPeroids = [1, 5, 20, 60] dict = {} for header in headers: try: #print(header) entry = '' for period in smaPeroids: #print(df[header]) data = pd.to_numeric(df[header], errors='coerce').dropna( how='any', axis=0).head(period) #print(data) sma = round(data.mean(), 2) #print(sma) entry += ('' if entry == '' else ' / ') + str(sma).rjust(8) #print(header.replace(' ', '')) #print(entry) if header == '收盤': data = [x.strip() for x in entry.split('/')] prefixIcon = '' if float(data[0]) > float(data[1]) and float(data[0]) > float( data[2]): prefixIcon = '👍' elif float(data[0]) < float(data[3]): prefixIcon = '👎' entry = prefixIcon + entry # 成交量 > 5ma 3倍 if header == '張數': data = [x.strip() for x in entry.split('/')] if (float(data[0]) / float(data[1]) > 3.0): entry = '🏆' + entry dict.update({ header.replace(' ', '') + '(' + 'ma / '.join( map(str, smaPeroids)) + 'ma)': str(entry) }) except: dict.update({ header.replace(' ', '') + '(' + 'ma / '.join( map(str, smaPeroids)) + 'ma)': '' }) #print(dict) result = pd.DataFrame([dict]) return result
def downloadFiles(req): downloadFileList = req.POST.get("downloadFileList").split(",") print(downloadFileList) fileOperator = Utils.fileOperator() return fileOperator.zipFilesInResponse(downloadFileList)
def GetChampionStock(op): # 過濾清單 if op == 0: competitors = GetBasicStockInfo(True) print(competitors) competitors.to_csv(f'{Utils.GetRootPath()}\Data\Temp\過濾清單.csv', encoding='utf_8_sig') if op == 1: basicStockInfo_df = GetBasicStockInfo(True) for stockId in [ '2477', '2915', '1608', '2809', '5469', '1313', '2357', '1304', '2855', '5533', '2891', '3036', '2505', '2816', '2905', '2461', '2885', '1513', '3033', '9945', '3702', '1904', '3022', '1776', '3028', '2535', '2353', '1308', '3048', '3312', '2387', '1305', '1604' ]: PE_df = GetPE(stockId) print(PE_df) Sleep() stockInfo_df = basicStockInfo_df[basicStockInfo_df['證券代號'] == stockId] stockInfo_df.reset_index(drop=True, inplace=True) print(stockInfo_df) temp_df = pd.concat([stockInfo_df, PE_df], axis=1) temp_df.to_csv(f'{Utils.GetRootPath()}\Data\Temp\過濾清單(含本益比).csv', mode='a', header=False, encoding='utf_8_sig') # 明細資料 if op == 2: basicStockInfo_df = GetBasicStockInfo() #sum_df = pd.DataFrame() for stockId in ['9930']: print(stockId) stockInfo_df = basicStockInfo_df[basicStockInfo_df['證券代號'] == stockId] stockInfo_df.reset_index(drop=True, inplace=True) print(stockInfo_df) if not stockInfo_df.empty: Sleep() finDetail_df = GetFinDetail(stockId) print(finDetail_df) PE_df = GetPE(stockId) print(PE_df) Sleep() transaction_df = GetTransaction(stockId) print(transaction_df) volume_df = GetVolume(stockId) print(volume_df) Sleep() dividend_df = GetDividend(stockId) print(dividend_df) Sleep() distribution_df = shareholderDistribution.GetDistribution( stockId) print(distribution_df) # 合併所有欄位成一列 temp_df = pd.concat([ stockInfo_df, transaction_df, volume_df, PE_df, distribution_df, finDetail_df, dividend_df ], axis=1) print(temp_df) # 將列合併入dataframe #sum_df = pd.concat([sum_df, temp_df], axis=0) # 每列寫入csv檔, 不含表頭 temp_df.to_csv(f'{Utils.GetRootPath()}\Data\Temp\彙整清單.csv', mode='a', header=False, encoding='utf_8_sig') # 寫入csv檔 #sum_df.to_csv('彙整清單.csv', encoding='utf_8_sig') # 日常籌碼面資料 if op == 3: basicStockInfo_df = GetBasicStockInfo() #sum_df = pd.DataFrame() for stockId in stocks: print(stockId) stockInfo_df = basicStockInfo_df[basicStockInfo_df['證券代號'] == stockId] stockInfo_df.reset_index(drop=True, inplace=True) print(stockInfo_df) if not stockInfo_df.empty: Sleep() transaction_df = GetTransaction(stockId) print(transaction_df) volume_df = GetVolume(stockId) print(volume_df) temp_df = pd.concat([stockInfo_df, transaction_df, volume_df], axis=1) print(temp_df) temp_df.to_csv(f'{Utils.GetRootPath()}\Data\Daily\籌碼面資料.csv', mode='a', header=False, encoding='utf_8_sig') # 合併所有欄位成一列 #sum_df = pd.concat([sum_df, temp_df], axis=0) # 將列合併入dataframe # sum_df.to_csv('籌碼面資料.csv',encoding='utf_8_sig') # 大戶、本益比 if op == 4: shareholderDistribution.WriteData() for stockId in stocks: print(stockId) Sleep() distribution_df = shareholderDistribution.GetDistribution(stockId) print(distribution_df) Sleep() PE_df = GetPE(stockId) print(PE_df) temp_df = pd.concat([PE_df, distribution_df], axis=1) print(temp_df) temp_df.to_csv( f'{Utils.GetRootPath()}\Data\\Weekly\股東分布_本益比_{date.today().strftime("%Y%m%d")}.csv', mode='a', header=False, encoding='utf_8_sig') if op == 5: directorSharehold.WriteData() if op == 7: basicStockInfo_df = GetBasicStockInfo() topVolumeStocks = dailyTopVolume.GetTopVolume()[:100] for stockId in topVolumeStocks: print(stockId) stockInfo_df = basicStockInfo_df[basicStockInfo_df['證券代號'] == stockId] stockInfo_df.reset_index(drop=True, inplace=True) print(stockInfo_df) if not stockInfo_df.empty: volume_df = GetVolume(stockId) print(volume_df) temp_df = pd.concat([stockInfo_df, volume_df], axis=1) print(temp_df) temp_df.to_csv( f'{Utils.GetRootPath()}\Data\Daily\異常籌碼資料_{date.today().strftime("%Y%m%d")}.csv', mode='a', header=False, encoding='utf_8_sig') # 刪除暫存檔案 try: folderPath = pathlib.Path( f'{Utils.GetRootPath()}\Data\Daily\Chip\{(date.today() - timedelta(days=1)).strftime("%Y%m%d")}' ) Utils.delete_folder(folderPath) except Exception as ex: print(ex)