def monthly_report(path, year, month): # 假如是西元,轉成民國 int_year = int(year) if int_year > 1990: china_year = int_year - 1911 url = 'http://mops.twse.com.tw/nas/t21/sii/t21sc03_' + str( china_year) + '_' + str(month) + '_0.html' if china_year <= 98: url = 'http://mops.twse.com.tw/nas/t21/sii/t21sc03_' + str( china_year) + '_' + str(month) + '.html' print(url) # 偽瀏覽器 headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' } # 下載該年月的網站,並用pandas轉換成 dataframe try: html_report = htmlRequest(url, "get", "") html_report.encoding = 'big5' html_df = pd.read_html(StringIO(html_report.text)) print("DataFrame create success") except Exception as e: print("exception happened") print(e) #r = requests.get(url, headers=headers) #r.encoding = 'big5' #html_df = pd.read_html(StringIO(r.text)) # 處理一下資料 if html_df[0].shape[0] > 500: df = html_df[0].copy() else: df = pd.concat([df for df in html_df if df.shape[1] <= 11]) df = df[list(range(0, 10))] column_index = df.index[(df[0] == u'公司代號')][0] df.columns = df.iloc[column_index] df[u'當月營收'] = pd.to_numeric(df[u'當月營收'], 'coerce') df = df[~df[u'當月營收'].isnull()] df = df[df[u'公司代號'] != u'合計'].reset_index(drop=True) # 偽停頓 time.sleep(5) df.to_csv(path + str(year) + "_" + str(month) + "_MonthlyRevenue.csv", index=False, encoding="utf-8") return df
def margin_purchase_and_short_sale_query(date, mode): target_url = url + str(date) + stock_type + str(mode) print("invest_table_by_type: URL [" + target_url + "]") try: #html_report = requests.get(target_url, headers=headers, timeout=htmltout) html_report = htmlRequest(target_url, "get", "") DataFrame_form = pd.read_html(html_report.text.encode('utf8')) except Exception as e: print("invest_table_by_type (ERROR): Exception") print(e) raise Exception return pd.concat(DataFrame_form)
def stock_query_html_table_by_type(date, mode): target_url = url + str(date) + stock_type + str(mode) print("stock_query_html_table_by_type: URL [" + target_url + "]") try: #html_report = requests.get(target_url, headers=headers, timeout=htmltout) html_report = htmlRequest(target_url, "get", "") DataFrame_form = pd.read_html(html_report.text.encode('utf8')) except Exception as e: print("stock_query_html_table_by_type (ERROR): Exception") print(e) raise Exception print("stock_query_html_table_by_type: return HTML result ") return pd.concat(DataFrame_form)
def invest_table_by_type(date, mode): target_url = url + str(date) + stock_type + str(mode) print("invest_table_by_type: URL [" + target_url + "]") try: # htmlRequest parameters: url, restful, payload html_report = htmlRequest(target_url, "get", "") DataFrame_form = pd.read_html(html_report.text.encode('utf8')) print("invest_table_by_type: DataFrame_form obtained") except Exception as e: print("invest_table_by_type (ERROR): Exception") print(e) raise Exception return pd.concat(DataFrame_form)
def fetch_data(url, cmp_id, cmp_name, group): #r = requests.get(url) r = htmlRequest(url, "get", "") root = etree.HTML(r.text) trs = root.xpath('//tr')[1:] result = [] typ = '' for tr in trs: tr = list(map(lambda x: x.text, tr.iter())) if len(tr) == 4: # This is type typ = tr[2].strip(' ') else: # This is the row data idx = make_row_tuple(typ, tr) if (idx[7] == u'ESVUFR'): cmp_id.append(idx[1]) cmp_name.append(idx[2]) group.append(idx[6])
def fetch_entire_finacialStatement(year, section, company_name, company_id, company_group, report_url_mode, balance_sheet_fetch, cash_flow_sheet_fetch, income_statement_sheet_fetch): print (report_url_mode) target_url = web_url + company_url + str(company_id) + year_url+ str(year)+ q_url+str(section) + report_url_mode print (target_url) try: html_report = htmlRequest(target_url, "get", "") html_report.encoding = 'big5' DataFrame_form = pd.read_html(StringIO(html_report.text)) print ("DataFrame create success") except Exception as e: print ("exception happened") print(e) #print DataFrame_form #r = requests.get(target_url, headers=headers) #r.encoding = 'big5' #DataFrame_form = pd.read_html(StringIO(r.text)) if(len(DataFrame_form) < 2): print ("hteml file not exist") print (DataFrame_form) raise print ("fetch data start") balance_sheet_table = DataFrame_form[1] income_statement_sheet_table = DataFrame_form[2] cash_flow_sheet_table = DataFrame_form[3] print ("fetch balance_sheet_fetch start") balance = balance_sheet() search_start = 0 for idx, members in balance.items(): for cell in members : for row_item in range (search_start, balance_sheet_table.shape[0]): if balance_sheet_table.loc[row_item][0] == cell: balance_sheet_fetch.update({cell : balance_sheet_table.loc[row_item][1]}) search_start = row_item break if(row_item == (balance_sheet_table.shape[0]-1)): balance_sheet_fetch.update({cell : 0}) print ("fetch income_statement_sheet_table start") income = income_statement_sheet() search_start = 0 for idx, members in income.items(): for cell in members : for row_item in range (search_start, income_statement_sheet_table.shape[0]): if income_statement_sheet_table.loc[row_item][0] == cell: income_statement_sheet_fetch.update({cell : income_statement_sheet_table.loc[row_item][1]}) search_start = row_item break if(row_item == (income_statement_sheet_table.shape[0]-1)): income_statement_sheet_fetch.update({cell : 0}) print ("fetch cash_flow_sheet_fetch start") cash = cash_flow_sheet() search_start = 0 for idx, members in cash.items(): for cell in members : for row_item in range (search_start, cash_flow_sheet_table.shape[0]): if cash_flow_sheet_table.loc[row_item][0] == cell: cash_flow_sheet_fetch.update({cell: cash_flow_sheet_table.loc[row_item][1]}) search_start = row_item break if(row_item == (cash_flow_sheet_table.shape[0]-1)): cash_flow_sheet_fetch.update({cell : 0}) pd0 = pd.DataFrame({"Name" : str(company_name)}, index=[0]) pd1 = pd.DataFrame({"ID" : str(company_id)}, index=[0]) pd2 = pd.DataFrame({"Group" : str(company_group)}, index=[0]) pd3 = pd.DataFrame(data=balance_sheet_fetch, index=[0]) pd4 = pd.DataFrame(data=income_statement_sheet_fetch, index=[0]) pd5 = pd.DataFrame(data=cash_flow_sheet_fetch, index=[0]) #母公司淨利比例 data1 = float(income_statement_sheet_fetch[u'營業利益(損失)']) + float(income_statement_sheet_fetch[u'營業外收入及支出合計']) data2 = float(income_statement_sheet_fetch[u'營業利益(損失)']) if(data1 ==0): print ("exception error happened") temp = 0.0 else : temp = float(data2 / data1) print ("母公司淨利比例 : " + str(temp) ) pd6 = pd.DataFrame({"母公司淨利比例" : temp }, index=[0]) #業外占營收比例 data1 = float(income_statement_sheet_fetch[u'營業利益(損失)']) + float(income_statement_sheet_fetch[u'營業外收入及支出合計']) data2 = float(income_statement_sheet_fetch[u'營業外收入及支出合計']) if(data1 ==0): print ("exception error happened") temp = 0.0 else : temp = float(data2 / data1) print ("業外占營收比例 : " + str(temp) ) pd7 = pd.DataFrame({"業外占營收比例" : temp },index=[0]) #存貨周轉率 data1 = float(balance_sheet_fetch[u'存貨']) data2 = float(income_statement_sheet_fetch[u'營業成本合計']) if(data1 ==0): print ("exception error happened") temp = 0.0 else : temp = float(data2 / data1) print ("存貨周轉率 : " + str(temp) ) pd8 = pd.DataFrame({"存貨周轉率" : temp }, index=[0]) #毛利率 data1 = float(income_statement_sheet_fetch[u'營業收入合計']) data2 = float(income_statement_sheet_fetch[u'營業毛利(毛損)淨額']) if(data1 ==0): print ("exception error happened") temp = 0.0 else : temp = float(data2 / data1) print ("毛利率 : " + str(temp) ) pd9 = pd.DataFrame({"毛利率" : temp }, index=[0]) #營業利益 data1 = float(income_statement_sheet_fetch[u'營業收入合計']) data2 = float(income_statement_sheet_fetch[u'營業利益(損失)']) if(data1 ==0): print ("exception error happened") temp = 0.0 else : temp = float(data2 / data1) print ("營業利益率 : " + str(temp) ) pd10 = pd.DataFrame({"營業利益率" : temp }, index=[0]) #淨利率 data2 = float(income_statement_sheet_fetch[u'本期淨利(淨損)']) data1 = float(income_statement_sheet_fetch[u'營業收入合計']) if(data1 ==0): print ("exception error happened") temp = 0.0 else : temp = float(data2 / data1) print ("淨利率 : " + str(temp) ) pd11 = pd.DataFrame({"淨利率" : temp }, index=[0]) #original Roe data1 = float(balance_sheet_fetch[u'權益總額']) data2 = float(income_statement_sheet_fetch[u'本期淨利(淨損)'] )- float(income_statement_sheet_fetch[u'營業外收入及支出合計']) if(data1 ==0): print ("exception error happened") temp = 0.0 else : temp = float(data2 / data1) if(data2>0.0): print ("ROE_Org : " + str(temp) ) else: print ("ROE_Org : -" + str(abs(temp)) ) pd12 = pd.DataFrame({"ROE_Org" : temp }, index=[0]) #total income Roe data1 = float(balance_sheet_fetch[u'權益總額']) data2 = float(income_statement_sheet_fetch[u'本期淨利(淨損)']) if(data1 ==0): print ("exception error happened") temp = 0.0 else : temp = float(data2 / data1) print ("ROE : " + str(temp) ) pd13 = pd.DataFrame({"ROE" : temp }, index=[0]) #total Roa data1 = float(balance_sheet_fetch[u'負債及權益總計']) data2 = float(income_statement_sheet_fetch[u'本期淨利(淨損)']) if(data1 ==0): print ("exception error happened") temp = 0.0 else : temp = float(data2 / data1) print ("ROA : " + str(temp) ) pd14 = pd.DataFrame({"ROA" : temp }, index=[0]) frame_table = pd.concat([pd0, pd1, pd2, pd3, pd4, pd5, pd6, pd7, pd8, pd9, pd10, pd11, pd12, pd13, pd14], axis=1, sort=False) return frame_table