Example #1
0
def scrape_latest(soup, url, trading_date, formatted_output):
    if soup is None:
        print ('Insider ERR: no result for <' + url + '>')
        return None
    table = soup.find('table', {'class': 'nc'})
    if table is None:
        if S.DBG_ALL:
            print ('INFO: No insider data is available for <' + url + '>')
        return None
    insiders = {}
    director = "director" in url
    # for each row, there are many rows including no table
    for tr in table.findAll('tr'):
        td = tr.findAll('td')
        if S.DBG_INSIDER:
            print("DBG:")
            for x in td:
                print repr(x)
        # u'\u2019' is the last char in DATO' which can't be encoded to ascii
        # insider = [x.text.replace(u'\u2019', '').strip().encode("ascii") for x in td]
        insider = [printable(x.text.replace(u'\u2019', '').encode("ascii")).strip() for x in td]
        if len(insider) >= 10:
            name, chg_date, price, view = "", "", "", ""
            from_date, to_date, min_price, max_price = "", "", "", ""
            if len(insider) == 11:
                stock, announce_date, name, chg_date, chg_type, shares, price, direct, indirect, total = \
                    unpack_latest_td(*insider)
                view = S.I3_KLSE_URL + td[10].find('a').get('href').encode("ascii")
                if S.DBG_ALL or S.DBG_INSIDER:
                    print("%s, %s, %s, %s, %s, %s, %s, %s, %s, %s" %
                          (stock, announce_date, chg_date, chg_type, shares, price, direct, indirect, total, view))
            else:
                stock, announce_date, from_date, to_date, chg_type, shares, min_price, max_price, total = \
                    unpack_company_td(*insider)
                view = S.I3_KLSE_URL + td[9].find('a').get('href').encode("ascii")
                if S.DBG_ALL or S.DBG_INSIDER:
                    print("%s, %s, %s, %s, %s, %s, %s, %s, %s" %
                          (stock, announce_date, from_date, to_date, chg_type, shares, min_price, max_price, total))
            ann_date = change2KlseDateFmt(announce_date, "%d-%b-%Y")
            trd_date = change2KlseDateFmt(trading_date, "%d-%b-%Y")
            if S.DBG_QR:
                print("DBG:dates:{0}:{1}".format(ann_date, trd_date))
            if ann_date >= trd_date:
                if len(insider) == 11:
                    if stock not in insiders:
                        insiders[stock] = []
                    insiders[stock].append(format_insider(
                        formatted_output,
                        director, stock, announce_date, name, chg_date,
                        chg_type, shares, price, view))
                else:
                    if stock not in insiders:
                        insiders[stock] = []
                    insiders[stock].append(format_company(
                        formatted_output,
                        stock, announce_date,from_date, to_date,
                        chg_type, shares, min_price, max_price, total, view))
            else:
                break
    return insiders
Example #2
0
def scrape_entitlement(soup, url, trading_date, formatted_output):
    if soup is None:
        print('Insider ERR: no result for <' + url + '>')
        return None
    table = soup.find('table', {'class': 'nc'})
    if table is None:
        if S.DBG_ALL:
            print('INFO: No insider data is available for <' + url + '>')
        return None
    entitlements = {}
    others = "others" in url
    # for each row, there are many rows including no table
    for tr in table.findAll('tr'):
        td = tr.findAll('td')
        if S.DBG_INSIDER:
            print("DBG:")
            for x in td:
                print repr(x)
        # u'\u2019' is the last char in DATO' which can't be encoded to ascii
        # insider = [x.text.replace(u'\u2019', '').strip().encode("ascii") for x in td]
        insider = [
            printable(x.text.replace(u'\u2019', '').encode("ascii")).strip()
            for x in td
        ]
        if len(insider) >= 7:
            if len(insider) == 7:
                announce_date, stock, open_price, current_price, dividend, ex_date = \
                    unpack_dividend_td(*insider)
                view = S.I3_KLSE_URL + td[6].find('a').get('href').encode(
                    "ascii")
            else:
                announce_date, stock, subject, open_price, current_price, ratio, ex_date = \
                    unpack_others_td(*insider)
                view = S.I3_KLSE_URL + td[7].find('a').get('href').encode(
                    "ascii")
            if S.DBG_ALL or S.DBG_INSIDER:
                print "view: {}".format(view)
            ann_date = change2KlseDateFmt(announce_date, "%d-%b-%Y")
            trd_date = change2KlseDateFmt(trading_date, "%d-%b-%Y")
            if S.DBG_QR:
                print("DBG:dates:{0}:{1}".format(ann_date, trd_date))
            if ann_date >= trd_date:
                if len(insider) == 7:
                    entitlements[stock] = format_dividend(
                        formatted_output, others, announce_date, stock, "",
                        open_price, current_price, dividend, ex_date, view)
                else:
                    entitlements[stock] = format_dividend(
                        formatted_output, others, announce_date, stock,
                        subject, open_price, current_price, ratio, ex_date,
                        view)
            else:
                break
    return entitlements
Example #3
0
def scrape_latest_ar(soup, trading_date):
    if soup is None or len(soup) <= 0:
        print('LatestAR ERR: no result')
        return None
    table = soup.find('table', {'class': 'nc'})
    if table is None:
        if S.DBG_ALL:
            print('INFO: No Latest AR data is available')
        return None
    ar_list = {}
    for tr in table.findAll('tr'):
        td = tr.findAll('td')
        latest_ar = [
            x.text.strip().replace('&nbsp; ', '').encode("ascii") for x in td
        ]
        # latest_ar = [printable(x.text.encode("ascii").replace('&nbsp;', '')).strip() for x in td]
        if S.DBG_QR:
            print("DBG:")
            for x in latest_ar:
                print repr(x)
        if len(latest_ar) > 0:
            [stock, fy, ann_date, announce_date,
             latest_ann] = unpack_latest_ar(*latest_ar)
            if announce_date == trading_date:
                if stock not in ar_list:
                    links = tr.findAll('a')
                    jsp_link = ""
                    for link in links:
                        jsp_link = link.get('href')
                        if "annual" in jsp_link:
                            jsp_link = get_yoy_links(jsp_link)
                            if len(jsp_link) > 0:
                                break
                    ar_list[stock] = [
                        fy, ann_date, announce_date, latest_ann, jsp_link
                    ]
                else:
                    print("INFO: Duplicated announcement: " + stock + ":" +
                          latest_ann + ":" + announce_date)
            else:
                ann_dt = change2KlseDateFmt(announce_date, "%d-%b-%Y")
                trd_dt = change2KlseDateFmt(trading_date, "%d-%b-%Y")
                if S.DBG_QR:
                    print("DBG:dates:{0}:{1}".format(ann_dt, trd_dt))
                if ann_dt < trd_dt:
                    break
    return ar_list
Example #4
0
def scrape_latest_qr(soup, trading_date):
    if soup is None or len(soup) <= 0:
        print ('LatestQR ERR: no result')
        return None
    table = soup.find('table', {'class': 'nc'})
    if table is None:
        if S.DBG_ALL:
            print ('INFO: No Latest QR data is available')
        return None
    qr_list = {}
    pdf_list = {}
    for tr in table.findAll('tr'):
        td = tr.findAll('td')
        latestQR = [x.text.strip().replace('&nbsp; ', '').encode("ascii") for x in td]
        # latestQR = [printable(x.text.encode("ascii").replace('&nbsp;', '')).strip() for x in td]
        if S.DBG_QR:
            print("DBG:")
            for x in latestQR:
                print repr(x)
        if len(latestQR) > 0:
            [stock, announcementDate, qd, qn, rev, pbt, np, div, roe, eps, dps, qoq, yoy] = unpack_latest_qr(*latestQR)
            if announcementDate == trading_date:
                if stock not in qr_list:
                    links = tr.findAll('a')
                    jsp_link = ""
                    for link in links:
                        jsp_link = link.get('href')
                        if "QoQ" in jsp_link:
                            jsp_link = get_qoq_links(jsp_link)
                            if len(jsp_link) > 0:
                                break
                    qr_list[stock] = [announcementDate, qd, qn, rev, pbt, np, div, roe, eps, dps, qoq, yoy, jsp_link]
                    # pdf_list[stock] = review_pdf(jsp_link.keys())
                else:
                    print ("INFO: Duplicated announcement: " + stock + ":" + qd + ":Q" + qn)
            else:
                ann_dt = change2KlseDateFmt(announcementDate, "%d-%b-%Y")
                trd_dt = change2KlseDateFmt(trading_date, "%d-%b-%Y")
                if S.DBG_QR:
                    print("DBG:dates:{0}:{1}".format(ann_dt, trd_dt))
                if ann_dt < trd_dt:
                    break
    return qr_list
Example #5
0
def scrape_listing(soup, trading_date, formatted_output):
    if soup is None:
        print('Insider ERR: no result for <' + I3_LISTING_URL + '>')
        return None
    table = soup.find('table', {'class': 'nc'})
    if table is None:
        if S.DBG_ALL:
            print('INFO: No insider data is available for <' + I3_LISTING_URL +
                  '>')
        return None
    listings = {}
    for tr in table.findAll('tr'):
        td = tr.findAll('td')
        if S.DBG_INSIDER:
            print("DBG:")
            for x in td:
                print repr(x)
        # u'\u2019' is the last char in DATO' which can't be encoded to ascii
        # insider = [x.text.replace(u'\u2019', '').strip().encode("ascii") for x in td]
        insider = [
            printable(x.text.replace(u'\u2019', '').encode("ascii")).strip()
            for x in td
        ]
        if len(insider) >= 7:
            stock, announce_date, listing_date, type, units, price = \
                unpack_listing_td(*insider)
            view = S.I3_KLSE_URL + td[6].find('a').get('href').encode("ascii")
            if S.DBG_ALL or S.DBG_INSIDER:
                print "view: {}".format(view)
            ann_date = change2KlseDateFmt(announce_date, "%d-%b-%Y")
            trd_date = change2KlseDateFmt(trading_date, "%d-%b-%Y")
            if S.DBG_QR:
                print("DBG:dates:{0}:{1}".format(ann_date, trd_date))
            if ann_date >= trd_date:
                listings[stock] = format_listing(formatted_output, stock,
                                                 announce_date, listing_date,
                                                 type, units, price, view)
            else:
                break
    return listings
Example #6
0
def scrape_target(soup, trading_date, formatted_output):
    if soup is None:
        print('Insider ERR: no result for <' + I3_TARGET_URL + '>')
        return None
    table = soup.find('table', {'class': 'nc'})
    if table is None:
        if S.DBG_ALL:
            print('INFO: No insider data is available for <' + I3_TARGET_URL +
                  '>')
        return None
    targets = {}
    for tr in table.findAll('tr'):
        td = tr.findAll('td')
        if S.DBG_INSIDER:
            print("DBG:")
            for x in td:
                print repr(x)
        # u'\u2019' is the last char in DATO' which can't be encoded to ascii
        # insider = [x.text.replace(u'\u2019', '').strip().encode("ascii") for x in td]
        insider = [
            printable(x.text.replace(u'\u2019', '').encode("ascii")).strip()
            for x in td
        ]
        if len(insider) >= 7:
            announce_date, stock, last_price, target, upside_down, call, source = \
                unpack_listing_td(*insider)
            ann_date = change2KlseDateFmt(announce_date, "%d/%m/%Y")
            trd_date = change2KlseDateFmt(trading_date, "%d-%b-%Y")
            if S.DBG_QR:
                print("DBG:dates:{0}:{1}".format(ann_date, trd_date))
            if ann_date >= trd_date:
                targets[stock] = format_target(formatted_output, announce_date,
                                               stock, last_price, target,
                                               upside_down, call, source)
            else:
                break
    return targets
Example #7
0
def unpackTD(dt, price_open, price_range, price_close, change, volume):
    '''
    Sample table:
    <tr>
        <td class="left">13/04/2018</td>
        <td class="right">2.92</td>
        <td class="right">2.92 - 2.98</td>
        <td class="right">2.98</td>
        <td class="right" nowrap="nowrap"><span class="up">0.00 (0.00%)</span></td>
        <td class="right">10,500</td>
    </tr>
    '''
    # dt = datetime.datetime.strptime(dt, "%d/%m/%Y").strftime('%Y-%m-%d')
    dt = change2KlseDateFmt(dt, "%d/%m/%Y")
    prange = [x.strip() for x in price_range.split('-')]
    return dt, price_open, prange[1], prange[0], price_close, volume
def scrapeFinancials(soup, counter, term, report):
    if soup is None or len(soup) <= 0:
        print 'ERR: no result'
        return None

    # klsecol = WSJCOL[report] + term
    table = soup.find('table', {'class': 'cr_dataTable'})
    if table is None:
        print "ERR:", counter, term, report
        return None

    qrs = {}
    # for each row, there are many rows including no table
    for tr in table.findAll('tr'):
        td = tr.findAll('td')
        if len(td) == 0:
            for th in table.findAll('th'):
                tht = th.text
                if len(tht) <= 0:
                    continue
                if tht[0].isalpha() or tht[len(tht) - 1].isalpha():
                    # Skip "fiscalYr" class and 5-year trend
                    continue
                if term == 'Q':
                    qrs[counter][change2KlseDateFmt(tht, '%d-%b-%Y')] = True
                else:
                    qrs[counter][tht] = True
            '''
            for qr in qrs.iterkeys():
                if db[klsecol].find({counter: {term: qr}}).count() <= 0:
                    qrs[qr] = False
            print qrs
            '''
            continue
        qrs[counter][]
    return qrs