Example #1
0
        return f'{self.__class__.__name__} - {self.section}'


if __name__ == '__main__':
    from hkex_api import HKEX_API
    # https://www1.hkexnews.hk/listedco/listconews/gem/2020/0929/2020092901098.pdf #concat number
    # https://www1.hkexnews.hk/listedco/listconews/sehk/2020/0929/2020092900604.pdf #concat number
    query = HKEX_API()
    urls = [data.file_link for data in query.get_data()]
    # urls = ['https://www1.hkexnews.hk/listedco/listconews/sehk/2020/0923/2020092300374.pdf']
    for url in urls:
        # url = data.file_link
        # url, p = 'https://www1.hkexnews.hk/listedco/listconews/sehk/2020/0721/2020072100713.pdf', 61
        # url, p = 'https://www1.hkexnews.hk/listedco/listconews/sehk/2020/0721/2020072100653.pdf', 94
        print(url)
        pdf = PDF.create(url)
        corp_gov_report = pdf.get_outline(CorporateGovReport.title_regex)
        if not corp_gov_report:
            continue
        corp_gov_report = CorporateGovReport.create(corp_gov_report[0])
        if not corp_gov_report:
            continue
        if not corp_gov_report.audit_fee:
            continue
        try:
            page = corp_gov_report.audit_fee.pages[0]
            sec = corp_gov_report.audit_fee.sections[0]
            table = corp_gov_report.audit_fee.tables[0]
        except Exception as e:
            print(e)
            continue