Ejemplo n.º 1
0
def _html_to_pe_df(lst):
    del lst[0]
    del lst[0]
    del lst[0]
    lst.pop()
    
    l = len(lst) 
    dates = np.empty(l, dtype = 'datetime64[D]')
    arr = np.empty([3,l])
    c_cnt = 0
    r_cnt = 0
    for r in lst:
        c_cnt = 0
        for c in r.find_all('td'):
            try:
                val = float(c.get_text().replace(',','').replace(' ',''))
            except:
                val = np.NaN
            if c_cnt == 0:
                dates[r_cnt] = str_to_date(c.text)
            elif c_cnt > 0 and c_cnt <= 3 :
                arr[c_cnt - 1][r_cnt] = val
            c_cnt += 1
        r_cnt += 1
    df = pd.DataFrame()
    
    df['P/E'] = arr[0]
    df['P/B'] = arr[1]
    df['Div Yield'] = arr[2]
    
    
    df.index = dates
    return df
Ejemplo n.º 2
0
def _html_to_pe_df(lst):
    del lst[0]
    del lst[0]
    del lst[0]
    lst.pop()
    
    l = len(lst) 
    dates = np.empty(l, dtype = 'datetime64[D]')
    arr = np.empty([3,l])
    c_cnt = 0
    r_cnt = 0
    for r in lst:
        c_cnt = 0
        for c in r.find_all('td'):
            try:
                val = float(c.get_text().replace(',','').replace(' ',''))
            except:
                val = np.NaN
            if c_cnt == 0:
                dates[r_cnt] = str_to_date(c.text)
            elif c_cnt > 0 and c_cnt <= 3 :
                arr[c_cnt - 1][r_cnt] = val
            c_cnt += 1
        r_cnt += 1
    df = pd.DataFrame()
    
    df['P/E'] = arr[0]
    df['P/B'] = arr[1]
    df['Div Yield'] = arr[2]
    
    
    df.index = dates
    return df
Ejemplo n.º 3
0
def _html_to_index_df(lst):
    ''' delete top 3 rows which contain text headers'''    
    del lst[0]
    del lst[0]
    del lst[0]
    lst.pop()
    l = len(lst) 
    dates = np.empty(l, dtype = 'datetime64[D]')
    arr = np.empty([6,l])
    c_cnt = 0
    r_cnt = 0
    for r in lst:
        c_cnt = 0
        for c in r.find_all('td'):
            try:
                val = float(c.get_text().replace(',','').replace(' ',''))
            except:
                val = np.NaN
            if c_cnt == 0:
                dates[r_cnt] = str_to_date(c.text)
            elif c_cnt > 0 and c_cnt <= 6 :
                arr[c_cnt - 1][r_cnt] = val
            c_cnt += 1
        r_cnt += 1
    df = pd.DataFrame()
    
    df['Open'] = arr[0]
    df['High'] = arr[1]
    df['Low'] = arr[2]
    df['Close'] = arr[3]
    df['Shares Traded'] = arr[4]
    df['Turnover'] = arr[5]
    df.index = dates
    return df
Ejemplo n.º 4
0
def _html_to_index_df(lst):
    ''' delete top 3 rows which contain text headers'''    
    del lst[0]
    del lst[0]
    del lst[0]
    lst.pop()
    l = len(lst) 
    dates = np.empty(l, dtype = 'datetime64[D]')
    arr = np.empty([6,l])
    c_cnt = 0
    r_cnt = 0
    for r in lst:
        c_cnt = 0
        for c in r.find_all('td'):
            try:
                val = float(c.get_text().replace(',','').replace(' ',''))
            except:
                val = np.NaN
            if c_cnt == 0:
                dates[r_cnt] = str_to_date(c.text)
            elif c_cnt > 0 and c_cnt <= 6 :
                arr[c_cnt - 1][r_cnt] = val
            c_cnt += 1
        r_cnt += 1
    df = pd.DataFrame()
    
    df['Open'] = arr[0]
    df['High'] = arr[1]
    df['Low'] = arr[2]
    df['Close'] = arr[3]
    df['Shares Traded'] = arr[4]
    df['Turnover'] = arr[5]
    df.index = dates
    return df