def handle_download(download_page, params): """Original function""" global output_dir print(params) filename = archive_format(params) + ".pdf" volumes = { 'Α' : 'A', 'Β' : 'B', 'Γ' : 'C', 'Δ' : 'D', 'Α.ΕΙ.Δ.' : 'A.EI.D', 'Α.Σ.Ε.Π.': 'A.S.E.P', 'Δ.Δ.Σ.': 'D.D.S', 'Α.Π.Σ.': 'A.P.S', 'Υ.Ο.Δ.Δ.': 'Y.O.D.D', 'Α.Α.Π.' : 'A.A.P', 'Ν.Π.Δ.Δ.': 'N.P.D.D', 'ΠΑΡΑΡΤΗΜΑ': 'APPENDIX', 'Δ.Ε.Β.Ι.': 'D.E.B.I', 'ΑΕ-ΕΠΕ': 'AE-EPE', 'Ο.Π.Κ.': 'O.P.K', } vol = volumes[params['issue_type']] year = params['issue_date'].year dirs = '{}/{}/{}'.format(vol, year, filename[6:9]) os.system('mkdir -p {}/{}'.format(output_dir, dirs)) outfile = '{}/{}/{}'.format(output_dir, dirs, filename) if os.path.isfile(outfile): logging.info('{} already a file'.format(filename)) return try: # First we get the redirect link from the download page html = Helper.get_url_contents(download_page) beautiful_soup = BeautifulSoup(html, "html.parser") meta = beautiful_soup.find("meta", {"http-equiv": "REFRESH"}) download_link = meta['content'].replace("0;url=", "") # We do the same process twice because it involves 2 redirects. beautiful_soup = BeautifulSoup( Helper.get_url_contents(download_link), "html.parser") meta = beautiful_soup.find("meta", {"http-equiv": "REFRESH"}) download_link = meta['content'].replace("0;url=", "") except BaseException as e: logging.error("Exception occurred while processing a link",exc_info=True) print(e) return None print(filename) logging.info('Downloaded {}'.format(filename)) Helper.download(download_link, filename, output_dir + '/' + dirs) return outfile
def handle_download(download_page, params): """Original function""" global output_dir print(params) filename = archive_format(params) + ".pdf" volumes = {'Α': 'A', 'Β': 'B'} vol = volumes[params['issue_type']] year = params['issue_date'].year dirs = '{}/{}/{}'.format(vol, year, filename[6:9]) os.system('mkdir -p {}/{}'.format(output_dir, dirs)) outfile = '{}/{}/{}'.format(output_dir, dirs, filename) if os.path.isfile(outfile): print('Already a file') return try: # First we get the redirect link from the download page html = Helper.get_url_contents(download_page) beautiful_soup = BeautifulSoup(html, "html.parser") meta = beautiful_soup.find("meta", {"http-equiv": "REFRESH"}) download_link = meta['content'].replace("0;url=", "") # We do the same process twice because it involves 2 redirects. beautiful_soup = BeautifulSoup(Helper.get_url_contents(download_link), "html.parser") meta = beautiful_soup.find("meta", {"http-equiv": "REFRESH"}) download_link = meta['content'].replace("0;url=", "") except BaseException as e: print(e) return None print(filename) Helper.download(download_link, filename, output_dir + '/' + dirs) return filename