def craw(date: str) -> pd.DataFrame: d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data1'] fields = d['fields1'] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] df = pd.DataFrame(data, columns=fields).replace(',', '', regex=True).replace( '--', np.nan) df.insert(0, '年月日', date) df['年月日'] = pd.to_datetime(df['年月日']).astype(str) df['漲跌(+/-)'] = df['漲跌(+/-)'].replace( "<p style= color:red>+</p>", 1).replace("<p style= color:green>-</p>", -1).replace('X', np.nan).replace(' ', 0) df['牛熊證觸及限制價格'] = df['牛熊證觸及限制價格'].replace('', 0).replace('*', 1).replace( '*', 1).fillna(np.nan) df['本益比'] = df['本益比'].replace('', np.nan).fillna(np.nan) intColumns = ['成交股數', '成交筆數', '最後揭示買量', '最後揭示賣量'] floatColumns = [ '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌(+/-)', '漲跌價差', '最後揭示買價', '最後揭示賣價', '本益比', '牛熊證觸及限制價格', '標的證券收盤價/指數' ] floatColumns = [col for col in floatColumns if col in list(df)] df[intColumns + floatColumns] = df[intColumns + floatColumns].replace( '', 0).fillna(np.nan) df = ast.to_int(intColumns, df) df = ast.to_float(floatColumns, df) return df
def craw_hugeDeal(date: str) -> pd.DataFrame: global empty d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data'] fields = d['fields'] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] if data== []: empty = empty + [date] df = pd.DataFrame(data, columns=fields).replace(',', '', regex=True).replace('--', np.nan).replace('', np.nan) df = df[df['證券代號'] != '總計'] df.insert(0, '年月日', date) df.insert(len(list(df)), '第幾筆', 1) df['年月日'] = pd.to_datetime(df['年月日']).astype(str) intColumns = ['第幾筆'] floatColumns = [] for col in ['成交價', '成交股數', '成交金額', '成交量']: if col in list(df): floatColumns.append(col) df[floatColumns] = df[floatColumns].astype(float) df[intColumns] = df[intColumns].astype(int) df = df.groupby(['年月日', '證券代號']).apply(addNumberF) return df
def craw(date: str) -> pd.DataFrame: d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data4'] fields = d['fields4'] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] data[0][1].split('(')[0] L = [] l = data[0] L.append([i.split('(')[0] for i in l]) L.append([i.split('(')[1].replace(')', '') for i in l]) l = data[1] L.append([i.split('(')[0] for i in l]) L.append([i.split('(')[1].replace(')', '') for i in l]) L.append(data[2]) L.append(data[3]) L.append(data[4]) df = pd.DataFrame(L, columns=fields).replace(',', '', regex=True).replace( '--', np.nan) df.insert(0, '年月日', date) df['年月日'] = pd.to_datetime(df['年月日']).astype(str) intColumns = ['整體市場', '股票'] df = ast.to_int(intColumns, df) return df
def craw_priceEarning(date: str) -> pd.DataFrame: d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data'] fields = d['fields'] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] df = pd.DataFrame(data, columns=fields).replace(',', '', regex=True) df = df.replace('--', np.nan).replace('-', np.nan) df['證券代號'] = df['證券代號'].str.strip() df['證券名稱'] = df['證券名稱'].str.strip() df.insert(0, '年月日', date) df['年月日'] = pd.to_datetime(df['年月日']).astype(str) floatColumns = ['殖利率(%)', '本益比', '股價淨值比'] df[floatColumns] = df[floatColumns].astype(float) columns = ['年月日', '證券代號', '證券名稱', '殖利率(%)', '股利年度', '本益比', '股價淨值比', '財報年/季'] if '股利年度' and '財報年/季' in list(df): intColumns = ['股利年度'] df[intColumns] = df[intColumns].astype(int) df[floatColumns] = df[floatColumns].astype(float) df.股利年度 = df.股利年度 + 1911 df['財報年/季'] = (df['財報年/季'].str.split('/').str[0].astype(int) + 1911).astype(str) + '/' + df['財報年/季'].str.split('/').str[1] df = df[columns] elif '財報年/季' in list(df): df['股利年度'] = np.nan df['財報年/季'] = (df['財報年/季'].str.split('/').str[0].astype(int) + 1911).astype(str) + '/' + df['財報年/季'].str.split('/').str[1] df = df[columns] else: df['股利年度'] = np.nan df['財報年/季'] = np.nan df = df[columns] return df
def craw(date: str) -> pd.DataFrame: d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data3'] fields = d['fields3'] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] df = pd.DataFrame(data, columns=fields).replace(',', '', regex=True).replace( '--', np.nan) df.insert(0, '年月日', date) df['年月日'] = pd.to_datetime(df['年月日']).astype(str) floatColumns = ['成交金額(元)', '成交股數(股)', '成交筆數'] df = ast.to_float(floatColumns, df) return df
def craw_margin(date: str) -> pd.DataFrame: d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data'] fields = d['fields'] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] df = pd.DataFrame(data, columns=fields).replace(',', '', regex=True).replace( '--', np.nan).replace('-', np.nan) df = df[df.證券名稱 != '合計'] df.insert(0, '證券代號', df['證券名稱'].str.split().str[0].str.strip()) df['證券名稱'] = df['證券名稱'].str.split().str[1].str.strip() df.insert(0, '年月日', date) df['年月日'] = pd.to_datetime(df['年月日']).astype(str) floatColumns = ['融券賣出成交金額', '借券賣出成交金額'] df[floatColumns] = df[floatColumns].astype(float) intColumns = ['融券賣出成交數量', '借券賣出成交數量'] df[intColumns] = df[intColumns].astype(int) return df
def craw(date: str) -> pd.DataFrame: d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data2'] fields = d['fields2'] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] df = pd.DataFrame(data, columns=fields).replace(',', '', regex=True).replace( '--', np.nan) df['漲跌(+/-)'] = df['漲跌(+/-)'].replace( "<p style ='color:red'>+</p>", 1).replace("<p style ='color:green'>-</p>", -1).replace('X', 0).replace(' ', 0) df.insert(0, '年月日', date) df = df.rename(columns={'報酬指數': '指數'}) df['年月日'] = pd.to_datetime(df['年月日']).astype(str) floatColumns = ['收盤指數', '漲跌(+/-)', '漲跌點數', '漲跌百分比(%)'] df = ast.to_float(floatColumns, df) return df
def craw(date: str) -> pd.DataFrame: d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data5'] fields = d['fields5'] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] df = pd.DataFrame(data, columns=fields).replace(',', '', regex=True).replace( '--', np.nan).replace('', np.nan) df['漲跌(+/-)'] = df['漲跌(+/-)'].replace( '<p style= color:red>+</p>', 1).replace('<p style= color:green>-</p>', -1).replace('X', 0).replace(' ', 0) df.insert(0, '年月日', date) df['年月日'] = pd.to_datetime(df['年月日']).astype(str) floatColumns = [ '成交股數', '成交筆數', '成交金額', '開盤價', '最高價', '最低價', '收盤價', '漲跌(+/-)', '漲跌價差', '最後揭示買價', '最後揭示買量', '最後揭示賣價', '最後揭示賣量', '本益比' ] df = ast.to_float(floatColumns, df) return df
def craw_institutional(date: str) -> pd.DataFrame: d = get_dict(date) if 'stat' in d and d['stat'] == '很抱歉,沒有符合條件的資料!': raise crawler.NoData('很抱歉,沒有符合條件的資料!') data = d['data'] fields = d['fields'] fields = [s.replace('</br>', '') for s in fields] date = d['date'][0:4] + '-' + d['date'][4:6] + '-' + d['date'][6:] df = pd.DataFrame(data, columns=fields).replace(',', '', regex=True).replace('--', np.nan).replace('</br>', '', regex=True) df.insert(0, '年月日', date) df['年月日'] = pd.to_datetime(df['年月日']).astype(str) df['證券名稱'] = df['證券名稱'].str.strip() cols = list(df) varchar_cols = ['年月日', '證券代號', '證券名稱'] float_cols = [col for col in cols if col not in varchar_cols] df[float_cols] = df[float_cols].astype(float) # if '自營商買進股數' in list(df): # floatColumns = ['外資買進股數', '外資賣出股數', '外資買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買賣超股數', '自營商買進股數', '自營商賣出股數', '三大法人買賣超股數'] # df[floatColumns] = df[floatColumns].astype(float) # else: # floatColumns = ['外資買進股數', '外資賣出股數', '外資買賣超股數', '投信買進股數', '投信賣出股數', '投信買賣超股數', '自營商買進股數(自行買賣)', '自營商賣出股數(自行買賣)', '自營商買賣超股數(自行買賣)', '自營商買進股數(避險)', '自營商賣出股數(避險)', '自營商買賣超股數(避險)', '三大法人買賣超股數'] # df[floatColumns] = df[floatColumns].astype(float) return df