def monthly_report(path, year, month):

    # 假如是西元,轉成民國

    int_year = int(year)

    if int_year > 1990:
        china_year = int_year - 1911

    url = 'http://mops.twse.com.tw/nas/t21/sii/t21sc03_' + str(
        china_year) + '_' + str(month) + '_0.html'

    if china_year <= 98:
        url = 'http://mops.twse.com.tw/nas/t21/sii/t21sc03_' + str(
            china_year) + '_' + str(month) + '.html'

    print(url)

    # 偽瀏覽器
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
    }

    # 下載該年月的網站,並用pandas轉換成 dataframe
    try:
        html_report = htmlRequest(url, "get", "")
        html_report.encoding = 'big5'
        html_df = pd.read_html(StringIO(html_report.text))
        print("DataFrame create success")
    except Exception as e:
        print("exception happened")
        print(e)

    #r = requests.get(url, headers=headers)
    #r.encoding = 'big5'
    #html_df = pd.read_html(StringIO(r.text))

    # 處理一下資料
    if html_df[0].shape[0] > 500:
        df = html_df[0].copy()
    else:
        df = pd.concat([df for df in html_df if df.shape[1] <= 11])
    df = df[list(range(0, 10))]
    column_index = df.index[(df[0] == u'公司代號')][0]
    df.columns = df.iloc[column_index]
    df[u'當月營收'] = pd.to_numeric(df[u'當月營收'], 'coerce')
    df = df[~df[u'當月營收'].isnull()]
    df = df[df[u'公司代號'] != u'合計'].reset_index(drop=True)

    # 偽停頓
    time.sleep(5)
    df.to_csv(path + str(year) + "_" + str(month) + "_MonthlyRevenue.csv",
              index=False,
              encoding="utf-8")
    return df
def margin_purchase_and_short_sale_query(date, mode):
    target_url = url + str(date) + stock_type + str(mode)
    print("invest_table_by_type: URL [" + target_url + "]")

    try:
        #html_report = requests.get(target_url, headers=headers, timeout=htmltout)
        html_report = htmlRequest(target_url, "get", "")
        DataFrame_form = pd.read_html(html_report.text.encode('utf8'))
    except Exception as e:
        print("invest_table_by_type (ERROR): Exception")
        print(e)
        raise Exception

    return pd.concat(DataFrame_form)
def stock_query_html_table_by_type(date, mode):
    target_url = url + str(date) + stock_type + str(mode)
    print("stock_query_html_table_by_type: URL [" + target_url + "]")

    try:
        #html_report = requests.get(target_url, headers=headers, timeout=htmltout)
        html_report = htmlRequest(target_url, "get", "")
        DataFrame_form = pd.read_html(html_report.text.encode('utf8'))
    except Exception as e:
        print("stock_query_html_table_by_type (ERROR): Exception")
        print(e)
        raise Exception

    print("stock_query_html_table_by_type: return HTML result ")
    return pd.concat(DataFrame_form)
예제 #4
0
def invest_table_by_type(date, mode):

    target_url = url + str(date) + stock_type + str(mode)
    print("invest_table_by_type: URL [" + target_url + "]")

    try:
        # htmlRequest parameters: url, restful, payload
        html_report = htmlRequest(target_url, "get", "")
        DataFrame_form = pd.read_html(html_report.text.encode('utf8'))
        print("invest_table_by_type: DataFrame_form obtained")
    except Exception as e:
        print("invest_table_by_type (ERROR): Exception")
        print(e)
        raise Exception

    return pd.concat(DataFrame_form)
def fetch_data(url, cmp_id, cmp_name, group):
    #r = requests.get(url)
    r = htmlRequest(url, "get", "")
    root = etree.HTML(r.text)
    trs = root.xpath('//tr')[1:]
    result = []
    typ = ''
    for tr in trs:
        tr = list(map(lambda x: x.text, tr.iter()))
        if len(tr) == 4:
            # This is type
            typ = tr[2].strip(' ')
        else:
            # This is the row data
            idx = make_row_tuple(typ, tr)
            if (idx[7] == u'ESVUFR'):
                cmp_id.append(idx[1])
                cmp_name.append(idx[2])
                group.append(idx[6])
def fetch_entire_finacialStatement(year,
                                   section,
                                   company_name,
                                   company_id,
                                   company_group,
                                   report_url_mode, 
                                   balance_sheet_fetch,
                                   cash_flow_sheet_fetch,
                                   income_statement_sheet_fetch):

    print (report_url_mode)
    target_url = web_url + company_url + str(company_id) + year_url+ str(year)+ q_url+str(section) + report_url_mode
    print (target_url)
    try:
        html_report = htmlRequest(target_url, "get", "")
        html_report.encoding = 'big5'
        DataFrame_form = pd.read_html(StringIO(html_report.text))
        print ("DataFrame create success")
    except Exception as e:
        print ("exception happened")
        print(e)

    #print DataFrame_form
    #r = requests.get(target_url, headers=headers)
    #r.encoding = 'big5'
    #DataFrame_form = pd.read_html(StringIO(r.text))

    if(len(DataFrame_form) < 2):
        print ("hteml file not exist")
        print (DataFrame_form)
        raise 


    print ("fetch data start")
    balance_sheet_table = DataFrame_form[1]
    income_statement_sheet_table = DataFrame_form[2]
    cash_flow_sheet_table = DataFrame_form[3]

    print ("fetch balance_sheet_fetch start")
    balance = balance_sheet()
    search_start = 0
    for idx, members in balance.items():
        for cell in members :
            for row_item in range (search_start, balance_sheet_table.shape[0]):
                if balance_sheet_table.loc[row_item][0] == cell:
                    balance_sheet_fetch.update({cell : balance_sheet_table.loc[row_item][1]})
                    search_start = row_item
                    break
                if(row_item == (balance_sheet_table.shape[0]-1)):
                    balance_sheet_fetch.update({cell : 0})

    print ("fetch income_statement_sheet_table start")
    income = income_statement_sheet()
    search_start = 0
    for idx, members in income.items():
        for cell in members :
            for row_item in range (search_start, income_statement_sheet_table.shape[0]):
                if income_statement_sheet_table.loc[row_item][0] == cell:
                    income_statement_sheet_fetch.update({cell : income_statement_sheet_table.loc[row_item][1]})
                    search_start = row_item
                    break
                if(row_item == (income_statement_sheet_table.shape[0]-1)):
                    income_statement_sheet_fetch.update({cell : 0})

    print ("fetch cash_flow_sheet_fetch start") 
    cash = cash_flow_sheet()
    search_start = 0
    for idx, members in cash.items():
        for cell in members :
            for row_item in range (search_start, cash_flow_sheet_table.shape[0]):
                if cash_flow_sheet_table.loc[row_item][0] == cell:
                    cash_flow_sheet_fetch.update({cell: cash_flow_sheet_table.loc[row_item][1]})
                    search_start = row_item
                    break
                if(row_item == (cash_flow_sheet_table.shape[0]-1)):
                    cash_flow_sheet_fetch.update({cell : 0})

    pd0 =  pd.DataFrame({"Name" : str(company_name)}, index=[0])
    pd1 =  pd.DataFrame({"ID" : str(company_id)}, index=[0])
    pd2 =  pd.DataFrame({"Group" : str(company_group)}, index=[0])
    
    pd3 =  pd.DataFrame(data=balance_sheet_fetch, index=[0])
    pd4 =  pd.DataFrame(data=income_statement_sheet_fetch, index=[0])
    pd5 =  pd.DataFrame(data=cash_flow_sheet_fetch, index=[0])
    
    #母公司淨利比例
    data1 = float(income_statement_sheet_fetch[u'營業利益(損失)']) + float(income_statement_sheet_fetch[u'營業外收入及支出合計'])
    data2 = float(income_statement_sheet_fetch[u'營業利益(損失)'])
    if(data1 ==0):
        print ("exception error happened")
        temp = 0.0
    else :
        temp = float(data2 / data1)
        
    print ("母公司淨利比例 : " + str(temp) )
    pd6 =  pd.DataFrame({"母公司淨利比例" : temp }, index=[0])

    #業外占營收比例
    data1 = float(income_statement_sheet_fetch[u'營業利益(損失)']) + float(income_statement_sheet_fetch[u'營業外收入及支出合計'])
    data2 = float(income_statement_sheet_fetch[u'營業外收入及支出合計'])

    if(data1 ==0):
        print ("exception error happened")
        temp = 0.0
    else :
        temp = float(data2 / data1)
        
    print ("業外占營收比例 : " + str(temp) )
    pd7 =  pd.DataFrame({"業外占營收比例" : temp },index=[0])

    
    #存貨周轉率
    data1 = float(balance_sheet_fetch[u'存貨'])
    data2 = float(income_statement_sheet_fetch[u'營業成本合計'])

    if(data1 ==0):
        print ("exception error happened")
        temp = 0.0
    else :
        temp = float(data2 / data1)

    print ("存貨周轉率 : " + str(temp) )
    pd8 =  pd.DataFrame({"存貨周轉率" : temp }, index=[0])
    
    #毛利率
    data1 = float(income_statement_sheet_fetch[u'營業收入合計'])
    data2 = float(income_statement_sheet_fetch[u'營業毛利(毛損)淨額'])

    if(data1 ==0):
        print ("exception error happened")
        temp = 0.0
    else :
        temp = float(data2 / data1)
        
    print ("毛利率 : " + str(temp) )
    pd9 =  pd.DataFrame({"毛利率" : temp }, index=[0])
    
    #營業利益
    data1 = float(income_statement_sheet_fetch[u'營業收入合計'])
    data2 = float(income_statement_sheet_fetch[u'營業利益(損失)'])
    if(data1 ==0):
        print ("exception error happened")
        temp = 0.0
    else :
        temp = float(data2 / data1)
        
    print ("營業利益率 : " + str(temp) )
    pd10 =  pd.DataFrame({"營業利益率" : temp }, index=[0])

    #淨利率
    data2 = float(income_statement_sheet_fetch[u'本期淨利(淨損)'])
    data1 = float(income_statement_sheet_fetch[u'營業收入合計'])
    if(data1 ==0):
        print ("exception error happened")
        temp = 0.0
    else :
        temp = float(data2 / data1)
        
    print ("淨利率 : " + str(temp) )
    pd11 =  pd.DataFrame({"淨利率" : temp }, index=[0])

    #original Roe
    data1 = float(balance_sheet_fetch[u'權益總額'])
    data2 = float(income_statement_sheet_fetch[u'本期淨利(淨損)'] )- float(income_statement_sheet_fetch[u'營業外收入及支出合計'])

    if(data1 ==0):
        print ("exception error happened")
        temp = 0.0
    else :
        temp = float(data2 / data1)

    if(data2>0.0):
        print ("ROE_Org : " + str(temp) )
    else:
        print ("ROE_Org : -" + str(abs(temp)) )
     
    pd12 =  pd.DataFrame({"ROE_Org" : temp }, index=[0])
    
    #total income Roe
    data1 = float(balance_sheet_fetch[u'權益總額'])
    data2 = float(income_statement_sheet_fetch[u'本期淨利(淨損)'])

    if(data1 ==0):
        print ("exception error happened")
        temp = 0.0
    else :
        temp = float(data2 / data1)

    print ("ROE : " + str(temp) )
    pd13 =  pd.DataFrame({"ROE" : temp }, index=[0])
    
    #total Roa
    data1 = float(balance_sheet_fetch[u'負債及權益總計'])
    data2 = float(income_statement_sheet_fetch[u'本期淨利(淨損)'])

    if(data1 ==0):
        print ("exception error happened")
        temp = 0.0
    else :
        temp = float(data2 / data1)

    print ("ROA : " + str(temp) )
    pd14 =  pd.DataFrame({"ROA" : temp }, index=[0])

    
    frame_table = pd.concat([pd0, pd1, pd2, pd3, pd4, pd5, pd6, pd7, pd8, pd9, pd10, pd11, pd12, pd13, pd14], axis=1, sort=False)
    return frame_table