Esempio n. 1
0
def GetTopVolume():
    cssSelector = '#divStockList'

    url = f'https://goodinfo.tw/tw/StockList.asp?RPT_TIME=&MARKET_CAT=熱門排行&INDUSTRY_CAT=日成交張數創近期新高日數@@成交張數@@日成交張數創近期新高日數'
    print(url)

    try:
        df = Utils.GetDataFrameByCssSelector(url, cssSelector)
        #return df
    except:
        time.sleep(random.randint(20, 30))
        df = Utils.GetDataFrameByCssSelector(url, cssSelector)
        print(df)
        #df.columns = df.columns.get_level_values(1)

    df.columns = df.columns.get_level_values(0)
    df = df.drop_duplicates(keep=False, inplace=False)
    #gain = pd.to_numeric(df['漲跌  價'], errors='coerce') > 0
    #market = df['市  場'] == '市'
    length = df['代號'].astype(str).map(len) == 4
    #df = df[gain & length]
    df = df[length]
    df.to_csv(f'{Utils.GetRootPath()}\Data\Daily\日成交張數創近期新高日數.csv',
              encoding='utf_8_sig')
    return df['代號'].values
Esempio n. 2
0
def GetFinData(stockId):
    url = f'https://goodinfo.tw/StockInfo/StockFinDetail.asp?RPT_CAT=XX_M_QUAR_ACC&STOCK_ID={stockId}'
    css_selector = '#txtFinBody'
    try:
        df = Utils.GetDataFrameByCssSelector(url, css_selector)
    except:
        time.sleep(random.randint(20, 30))
        df = Utils.GetDataFrameByCssSelector(url, css_selector)
    #print(df)
    return df
Esempio n. 3
0
def GetDividend(stockId):
    url = f'https://goodinfo.tw/tw/StockDividendPolicy.asp?STOCK_ID={stockId}'
    cssSelector = '#divDetail'
    try:
        df = Utils.GetDataFrameByCssSelector(url, cssSelector)
        df.columns = df.columns.get_level_values(3)
    except:
        time.sleep(random.randint(20, 30))
        df = Utils.GetDataFrameByCssSelector(url, cssSelector)
        df.columns = df.columns.get_level_values(3)

    # column replace space
    df.columns = df.columns.str.replace(' ', '')

    # filter not  ∟
    df = df[df['股利發放年度'] != '∟']
    #print(df)

    # 年度大於2022, 移除第一列
    firstRow = df.iloc[0, :]
    if int(firstRow['股利發放年度']) > datetime.now().year:
        df = df.iloc[1: , :]

    rowsCount = 5
    # 年度(取前5筆, index重新排序)
    year = pd.to_numeric(df.iloc[:, 0], errors='coerce').dropna(how='any',axis=0).head(rowsCount).astype(int).reset_index(drop=True)
    #print(year)

    # 現金(取前5筆, index重新排序)
    cash = pd.to_numeric(df.iloc[:, 3], errors='coerce').dropna(how='any',axis=0).head(rowsCount).reset_index(drop=True)
    #print(cash)
    
    # 股票(取前5筆, index重新排序)
    stock = pd.to_numeric(df.iloc[:, 6], errors='coerce').dropna(how='any',axis=0).head(rowsCount).reset_index(drop=True)
    #print(stock)

    data = []
    for index in range(0, rowsCount):
        data.append(str(cash[index]).rjust(6) + ' / ' + str(stock[index]).rjust(6))

    print(data)
    df = pd.DataFrame([data], columns=year)
    
    return df
Esempio n. 4
0
def GetAllDividend():    
    cssSelector = '#divStockList'
    
    for rankIndex in range(0, 6):
        
        url = f'https://goodinfo.tw/tw/StockList.asp?SHEET=股利政策&MARKET_CAT=熱門排行&INDUSTRY_CAT=合計股利&RANK={str(rankIndex)}'
        print(url)
        
        # 休息10~20秒
        time.sleep(random.randint(10, 20))

        try:
            df = Utils.GetDataFrameByCssSelector(url, cssSelector)
            #return df
        except:
            time.sleep(random.randint(20, 30))
            df = Utils.GetDataFrameByCssSelector(url, cssSelector)
            print(df)
            #df.columns = df.columns.get_level_values(1)

        df.columns = df.columns.get_level_values(0)
        df = df.drop_duplicates(keep=False, inplace=False) #移除重複標題
        #gain = pd.to_numeric(df['漲跌  價'], errors='coerce') > 0
        #market = df['市  場'] == '市'
        print(df)
        length = df['代號'].astype(str).map(len) == 4
        #df = df[gain & length]
        df = df[length]

        filePath = f'{Utils.GetRootPath()}\Data\Yearly\合計股利.csv'
        if rankIndex == 0:
            df.to_csv(filePath, encoding='utf_8_sig')
        else:
            df.to_csv(filePath, mode='a', header=False, encoding='utf_8_sig')
        # 去除重複標頭
        #sum_df[sum_df.ne(sum_df.columns).any(1)].to_csv(f'{Utils.GetRootPath()}\Data\Monthly\董監持股比例.csv',encoding='utf_8_sig')

    print('執行完成')
Esempio n. 5
0
def WriteData():
    cssSelector = '#divStockList'
    sum_df = pd.DataFrame()

    for rankIndex in range(0, 5):
        url = f'https://goodinfo.tw/tw/StockList.asp?SHEET=董監持股&MARKET_CAT=熱門排行&INDUSTRY_CAT=全體董監持股比例&RANK={str(rankIndex)}'
        print(url)

        try:
            time.sleep(random.randint(5, 10))
            df = Utils.GetDataFrameByCssSelector(url, cssSelector)
            print(df)
            sum_df = pd.concat([sum_df, df], axis=0)
            #df.columns = df.columns.get_level_values(1)
        except:
            time.sleep(random.randint(20, 30))
            df = Utils.GetDataFrameByCssSelector(url, cssSelector)
            print(df)
            #df.columns = df.columns.get_level_values(1)

    # 去除重複標頭
    sum_df[sum_df.ne(sum_df.columns).any(1)].to_csv(
        f'{Utils.GetRootPath()}\Data\Monthly\董監持股比例.csv', encoding='utf_8_sig')
Esempio n. 6
0
def GetPE(stockId):
    url = f'https://goodinfo.tw/StockInfo/ShowK_ChartFlow.asp?RPT_CAT=PER&STOCK_ID={stockId}&CHT_CAT=WEEK'
    css_selector = '#divK_ChartFlowDetail'
    try:
        df = Utils.GetDataFrameByCssSelector(url, css_selector)
        # 取前兩列後面倒數6欄資料
        firtRowDf = df.iloc[0,-6:]
        #print(firtRowDf)
    except:
        time.sleep(random.randint(20, 30))
        df = Utils.GetDataFrameByCssSelector(url, css_selector)
        
        # 取前兩列後面倒數6欄資料
        firtRowDf = df.iloc[0,-6:]
        #print(firtRowDf)
    
    #dataframe轉成dictionary 參考 https://stackoverflow.com/questions/45452935/pandas-how-to-get-series-to-dict
    dictionaries = [dict(key=re.findall(r'[0-9]+[.]?[0-9]*', str(k))[0], value=v) for k, v in firtRowDf.items()]
    #print(data)
    
    # 轉換成dataframe
    data = []
    headers = ['本益比-級距1倍數', '本益比-級距1價格', 
               '本益比-級距2倍數', '本益比-級距2價格',
               '本益比-級距3倍數', '本益比-級距3價格',
               '本益比-級距4倍數', '本益比-級距4價格',
               '本益比-級距5倍數', '本益比-級距5價格', 
               '本益比-級距6倍數', '本益比-級距6價格']
    for entry in dictionaries:
        #print(entry)
        data.append(entry['key'])
        data.append(entry['value'])

    ##print(headers)
    #print(data)
    df = pd.DataFrame([data], columns=headers)
    return df
Esempio n. 7
0
def GetTransaction(stockId):
    url = f'https://goodinfo.tw/tw/ShowK_Chart.asp?STOCK_ID={stockId}&CHT_CAT2=DATE'
    cssSelector = '#divPriceDetail'
    try:
        df = Utils.GetDataFrameByCssSelector(url, cssSelector)
        df.columns = df.columns.get_level_values(1)
    except:
        time.sleep(random.randint(20, 30))
        df = Utils.GetDataFrameByCssSelector(url, cssSelector)
        df.columns = df.columns.get_level_values(1)
    # 印出全部的rows
    #pd.set_option('display.max_rows', df.shape[0]+1)
    #print(df)

    headers = ['收盤', '張數', '外資  持股  (%)', '券資  比  (%)']
    smaPeroids = [1, 5, 20, 60]

    dict = {}
    for header in headers:
        try:
            #print(header)
            entry = ''
            for period in smaPeroids:
                #print(df[header])
                data = pd.to_numeric(df[header], errors='coerce').dropna(
                    how='any', axis=0).head(period)
                #print(data)
                sma = round(data.mean(), 2)
                #print(sma)
                entry += ('' if entry == '' else ' / ') + str(sma).rjust(8)

            #print(header.replace(' ', ''))
            #print(entry)

            if header == '收盤':
                data = [x.strip() for x in entry.split('/')]
                prefixIcon = ''
                if float(data[0]) > float(data[1]) and float(data[0]) > float(
                        data[2]):
                    prefixIcon = '👍'
                elif float(data[0]) < float(data[3]):
                    prefixIcon = '👎'
                entry = prefixIcon + entry

            # 成交量 > 5ma 3倍
            if header == '張數':
                data = [x.strip() for x in entry.split('/')]
                if (float(data[0]) / float(data[1]) > 3.0):
                    entry = '🏆' + entry

            dict.update({
                header.replace(' ', '') + '(' + 'ma / '.join(
                    map(str, smaPeroids)) + 'ma)':
                str(entry)
            })
        except:
            dict.update({
                header.replace(' ', '') + '(' + 'ma / '.join(
                    map(str, smaPeroids)) + 'ma)':
                ''
            })
    #print(dict)
    result = pd.DataFrame([dict])
    return result