return f'{self.__class__.__name__} - {self.section}' if __name__ == '__main__': from hkex_api import HKEX_API # https://www1.hkexnews.hk/listedco/listconews/gem/2020/0929/2020092901098.pdf #concat number # https://www1.hkexnews.hk/listedco/listconews/sehk/2020/0929/2020092900604.pdf #concat number query = HKEX_API() urls = [data.file_link for data in query.get_data()] # urls = ['https://www1.hkexnews.hk/listedco/listconews/sehk/2020/0923/2020092300374.pdf'] for url in urls: # url = data.file_link # url, p = 'https://www1.hkexnews.hk/listedco/listconews/sehk/2020/0721/2020072100713.pdf', 61 # url, p = 'https://www1.hkexnews.hk/listedco/listconews/sehk/2020/0721/2020072100653.pdf', 94 print(url) pdf = PDF.create(url) corp_gov_report = pdf.get_outline(CorporateGovReport.title_regex) if not corp_gov_report: continue corp_gov_report = CorporateGovReport.create(corp_gov_report[0]) if not corp_gov_report: continue if not corp_gov_report.audit_fee: continue try: page = corp_gov_report.audit_fee.pages[0] sec = corp_gov_report.audit_fee.sections[0] table = corp_gov_report.audit_fee.tables[0] except Exception as e: print(e) continue